Controller codebase refactoring

This commit is contained in:
gmarek
2015-07-31 13:38:04 +02:00
parent b73c53c37d
commit d27ad5b714
42 changed files with 91 additions and 89 deletions

View File

@@ -1,19 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package nodecontroller contains code for syncing cloud instances with
// minion registry
package nodecontroller

View File

@@ -1,476 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodecontroller
import (
"errors"
"fmt"
"net"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/types"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/golang/glog"
)
var (
ErrRegistration = errors.New("unable to register all nodes.")
ErrQueryIPAddress = errors.New("unable to query IP address.")
ErrCloudInstance = errors.New("cloud provider doesn't support instances.")
)
const (
// nodeStatusUpdateRetry controls the number of retries of writing NodeStatus update.
nodeStatusUpdateRetry = 5
// controls how often NodeController will try to evict Pods from non-responsive Nodes.
nodeEvictionPeriod = 100 * time.Millisecond
)
type nodeStatusData struct {
probeTimestamp util.Time
readyTransitionTimestamp util.Time
status api.NodeStatus
}
type NodeController struct {
cloud cloudprovider.Interface
kubeClient client.Interface
recorder record.EventRecorder
registerRetryCount int
podEvictionTimeout time.Duration
deletingPodsRateLimiter util.RateLimiter
// worker that evicts pods from unresponsive nodes.
podEvictor *PodEvictor
// per Node map storing last observed Status together with a local time when it was observed.
// This timestamp is to be used instead of LastProbeTime stored in Condition. We do this
// to aviod the problem with time skew across the cluster.
nodeStatusMap map[string]nodeStatusData
// Value used if sync_nodes_status=False. NodeController will not proactively
// sync node status in this case, but will monitor node status updated from kubelet. If
// it doesn't receive update for this amount of time, it will start posting "NodeReady==
// ConditionUnknown". The amount of time before which NodeController start evicting pods
// is controlled via flag 'pod_eviction_timeout'.
// Note: be cautious when changing the constant, it must work with nodeStatusUpdateFrequency
// in kubelet. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
// N means number of retries allowed for kubelet to post node status. It is pointless
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
// The constant must be less than podEvictionTimeout.
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger value takes
// longer for user to see up-to-date node status.
nodeMonitorGracePeriod time.Duration
// Value used if sync_nodes_status=False, only for node startup. When node
// is just created, e.g. cluster bootstrap or node creation, we give a longer grace period.
nodeStartupGracePeriod time.Duration
// Value controlling NodeController monitoring period, i.e. how often does NodeController
// check node status posted from kubelet. This value should be lower than nodeMonitorGracePeriod.
// TODO: Change node status monitor to watch based.
nodeMonitorPeriod time.Duration
clusterCIDR *net.IPNet
allocateNodeCIDRs bool
// Method for easy mocking in unittest.
lookupIP func(host string) ([]net.IP, error)
now func() util.Time
}
// NewNodeController returns a new node controller to sync instances from cloudprovider.
func NewNodeController(
cloud cloudprovider.Interface,
kubeClient client.Interface,
registerRetryCount int,
podEvictionTimeout time.Duration,
podEvictor *PodEvictor,
nodeMonitorGracePeriod time.Duration,
nodeStartupGracePeriod time.Duration,
nodeMonitorPeriod time.Duration,
clusterCIDR *net.IPNet,
allocateNodeCIDRs bool) *NodeController {
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(api.EventSource{Component: "controllermanager"})
eventBroadcaster.StartLogging(glog.Infof)
if kubeClient != nil {
glog.Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(kubeClient.Events(""))
} else {
glog.Infof("No api server defined - no events will be sent to API server.")
}
if allocateNodeCIDRs && clusterCIDR == nil {
glog.Fatal("NodeController: Must specify clusterCIDR if allocateNodeCIDRs == true.")
}
return &NodeController{
cloud: cloud,
kubeClient: kubeClient,
recorder: recorder,
registerRetryCount: registerRetryCount,
podEvictionTimeout: podEvictionTimeout,
podEvictor: podEvictor,
nodeStatusMap: make(map[string]nodeStatusData),
nodeMonitorGracePeriod: nodeMonitorGracePeriod,
nodeMonitorPeriod: nodeMonitorPeriod,
nodeStartupGracePeriod: nodeStartupGracePeriod,
lookupIP: net.LookupIP,
now: util.Now,
clusterCIDR: clusterCIDR,
allocateNodeCIDRs: allocateNodeCIDRs,
}
}
// Generates num pod CIDRs that could be assigned to nodes.
func generateCIDRs(clusterCIDR *net.IPNet, num int) util.StringSet {
res := util.NewStringSet()
cidrIP := clusterCIDR.IP.To4()
for i := 0; i < num; i++ {
// TODO: Make the CIDRs configurable.
b1 := byte(i >> 8)
b2 := byte(i % 256)
res.Insert(fmt.Sprintf("%d.%d.%d.0/24", cidrIP[0], cidrIP[1]+b1, cidrIP[2]+b2))
}
return res
}
// reconcileNodeCIDRs looks at each node and assigns it a valid CIDR
// if it doesn't currently have one.
func (nc *NodeController) reconcileNodeCIDRs(nodes *api.NodeList) {
glog.V(4).Infof("Reconciling cidrs for %d nodes", len(nodes.Items))
// TODO(roberthbailey): This seems inefficient. Why re-calculate CIDRs
// on each sync period?
availableCIDRs := generateCIDRs(nc.clusterCIDR, len(nodes.Items))
for _, node := range nodes.Items {
if node.Spec.PodCIDR != "" {
glog.V(4).Infof("CIDR %s is already being used by node %s", node.Spec.PodCIDR, node.Name)
availableCIDRs.Delete(node.Spec.PodCIDR)
}
}
for _, node := range nodes.Items {
if node.Spec.PodCIDR == "" {
podCIDR, found := availableCIDRs.PopAny()
if !found {
nc.recordNodeStatusChange(&node, "No available CIDR")
continue
}
glog.V(4).Infof("Assigning node %s CIDR %s", node.Name, podCIDR)
node.Spec.PodCIDR = podCIDR
if _, err := nc.kubeClient.Nodes().Update(&node); err != nil {
nc.recordNodeStatusChange(&node, "CIDR assignment failed")
}
}
}
}
// Run starts an asynchronous loop that monitors the status of cluster nodes.
func (nc *NodeController) Run(period time.Duration) {
// Incorporate the results of node status pushed from kubelet to master.
go util.Forever(func() {
if err := nc.monitorNodeStatus(); err != nil {
glog.Errorf("Error monitoring node status: %v", err)
}
}, nc.nodeMonitorPeriod)
go util.Forever(func() {
nc.podEvictor.TryEvict(func(nodeName string) { nc.deletePods(nodeName) })
}, nodeEvictionPeriod)
}
func (nc *NodeController) recordNodeStatusChange(node *api.Node, new_status string) {
ref := &api.ObjectReference{
Kind: "Node",
Name: node.Name,
UID: types.UID(node.Name),
Namespace: "",
}
glog.V(2).Infof("Recording status change %s event message for node %s", new_status, node.Name)
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
nc.recorder.Eventf(ref, new_status, "Node %s status is now: %s", node.Name, new_status)
}
func (nc *NodeController) recordNodeEvent(nodeName string, event string) {
ref := &api.ObjectReference{
Kind: "Node",
Name: nodeName,
UID: types.UID(nodeName),
Namespace: "",
}
glog.V(2).Infof("Recording %s event message for node %s", event, nodeName)
nc.recorder.Eventf(ref, event, "Node %s event: %s", nodeName, event)
}
// For a given node checks its conditions and tries to update it. Returns grace period to which given node
// is entitled, state of current and last observed Ready Condition, and an error if it ocured.
func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, api.NodeCondition, *api.NodeCondition, error) {
var err error
var gracePeriod time.Duration
var lastReadyCondition api.NodeCondition
readyCondition := nc.getCondition(&node.Status, api.NodeReady)
if readyCondition == nil {
// If ready condition is nil, then kubelet (or nodecontroller) never posted node status.
// A fake ready condition is created, where LastProbeTime and LastTransitionTime is set
// to node.CreationTimestamp to avoid handle the corner case.
lastReadyCondition = api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: node.CreationTimestamp,
}
gracePeriod = nc.nodeStartupGracePeriod
nc.nodeStatusMap[node.Name] = nodeStatusData{
status: node.Status,
probeTimestamp: node.CreationTimestamp,
readyTransitionTimestamp: node.CreationTimestamp,
}
} else {
// If ready condition is not nil, make a copy of it, since we may modify it in place later.
lastReadyCondition = *readyCondition
gracePeriod = nc.nodeMonitorGracePeriod
}
savedNodeStatus, found := nc.nodeStatusMap[node.Name]
// There are following cases to check:
// - both saved and new status have no Ready Condition set - we leave everything as it is,
// - saved status have no Ready Condition, but current one does - NodeController was restarted with Node data already present in etcd,
// - saved status have some Ready Condition, but current one does not - it's an error, but we fill it up because that's probably a good thing to do,
// - both saved and current statuses have Ready Conditions and they have the same LastProbeTime - nothing happened on that Node, it may be
// unresponsive, so we leave it as it is,
// - both saved and current statuses have Ready Conditions, they have different LastProbeTimes, but the same Ready Condition State -
// everything's in order, no transition occurred, we update only probeTimestamp,
// - both saved and current statuses have Ready Conditions, different LastProbeTimes and different Ready Condition State -
// Ready Condition changed it state since we last seen it, so we update both probeTimestamp and readyTransitionTimestamp.
// TODO: things to consider:
// - if 'LastProbeTime' have gone back in time its probably an error, currently we ignore it,
// - currently only correct Ready State transition outside of Node Controller is marking it ready by Kubelet, we don't check
// if that's the case, but it does not seem necessary.
savedCondition := nc.getCondition(&savedNodeStatus.status, api.NodeReady)
observedCondition := nc.getCondition(&node.Status, api.NodeReady)
if !found {
glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
savedNodeStatus = nodeStatusData{
status: node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
nc.nodeStatusMap[node.Name] = savedNodeStatus
} else if savedCondition == nil && observedCondition != nil {
glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
savedNodeStatus = nodeStatusData{
status: node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
nc.nodeStatusMap[node.Name] = savedNodeStatus
} else if savedCondition != nil && observedCondition == nil {
glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
// TODO: figure out what to do in this case. For now we do the same thing as above.
savedNodeStatus = nodeStatusData{
status: node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
nc.nodeStatusMap[node.Name] = savedNodeStatus
} else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime {
var transitionTime util.Time
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
// otherwise we leave it as it is.
if savedCondition.LastTransitionTime != observedCondition.LastTransitionTime {
glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition.Status, observedCondition)
transitionTime = nc.now()
} else {
transitionTime = savedNodeStatus.readyTransitionTimestamp
}
glog.V(3).Infof("Nodes ReadyCondition updated. Updating timestamp: %+v\n vs %+v.", savedNodeStatus.status, node.Status)
savedNodeStatus = nodeStatusData{
status: node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: transitionTime,
}
nc.nodeStatusMap[node.Name] = savedNodeStatus
}
if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) {
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
// (regardless of its current value) in the master, without contacting kubelet.
if readyCondition == nil {
glog.V(2).Infof("node %v is never updated by kubelet", node.Name)
node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
Type: api.NodeReady,
Status: api.ConditionUnknown,
Reason: fmt.Sprintf("Kubelet never posted node status."),
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: nc.now(),
})
} else {
glog.V(2).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), lastReadyCondition)
if lastReadyCondition.Status != api.ConditionUnknown {
readyCondition.Status = api.ConditionUnknown
readyCondition.Reason = fmt.Sprintf("Kubelet stopped posting node status.")
// LastProbeTime is the last time we heard from kubelet.
readyCondition.LastHeartbeatTime = lastReadyCondition.LastHeartbeatTime
readyCondition.LastTransitionTime = nc.now()
}
}
if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), lastReadyCondition) {
if _, err = nc.kubeClient.Nodes().UpdateStatus(node); err != nil {
glog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, lastReadyCondition, readyCondition, err
} else {
nc.nodeStatusMap[node.Name] = nodeStatusData{
status: node.Status,
probeTimestamp: nc.nodeStatusMap[node.Name].probeTimestamp,
readyTransitionTimestamp: nc.now(),
}
return gracePeriod, lastReadyCondition, readyCondition, nil
}
}
}
return gracePeriod, lastReadyCondition, readyCondition, err
}
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not,
// post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or
// not reachable for a long period of time.
func (nc *NodeController) monitorNodeStatus() error {
nodes, err := nc.kubeClient.Nodes().List(labels.Everything(), fields.Everything())
if err != nil {
return err
}
if nc.allocateNodeCIDRs {
// TODO (cjcullen): Use pkg/controller/framework to watch nodes and
// reduce lists/decouple this from monitoring status.
nc.reconcileNodeCIDRs(nodes)
}
for i := range nodes.Items {
var gracePeriod time.Duration
var lastReadyCondition api.NodeCondition
var readyCondition *api.NodeCondition
node := &nodes.Items[i]
for rep := 0; rep < nodeStatusUpdateRetry; rep++ {
gracePeriod, lastReadyCondition, readyCondition, err = nc.tryUpdateNodeStatus(node)
if err == nil {
break
}
name := node.Name
node, err = nc.kubeClient.Nodes().Get(name)
if err != nil {
glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name)
break
}
}
if err != nil {
glog.Errorf("Update status of Node %v from NodeController exceeds retry count."+
"Skipping - no pods will be evicted.", node.Name)
continue
}
decisionTimestamp := nc.now()
if readyCondition != nil {
// Check eviction timeout against decisionTimestamp
if lastReadyCondition.Status == api.ConditionFalse &&
decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
if nc.podEvictor.AddNodeToEvict(node.Name) {
glog.Infof("Adding pods to evict: %v is later than %v + %v", decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout)
}
}
if lastReadyCondition.Status == api.ConditionUnknown &&
decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout-gracePeriod)) {
if nc.podEvictor.AddNodeToEvict(node.Name) {
glog.Infof("Adding pods to evict2: %v is later than %v + %v", decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod)
}
}
if lastReadyCondition.Status == api.ConditionTrue {
if nc.podEvictor.RemoveNodeToEvict(node.Name) {
glog.Infof("Pods on %v won't be evicted", node.Name)
}
}
// Report node event.
if readyCondition.Status != api.ConditionTrue && lastReadyCondition.Status == api.ConditionTrue {
nc.recordNodeStatusChange(node, "NodeNotReady")
}
// Check with the cloud provider to see if the node still exists. If it
// doesn't, delete the node and all pods scheduled on the node.
if readyCondition.Status != api.ConditionTrue && nc.cloud != nil {
instances, ok := nc.cloud.Instances()
if !ok {
glog.Errorf("%v", ErrCloudInstance)
continue
}
if _, err := instances.ExternalID(node.Name); err != nil && err == cloudprovider.InstanceNotFound {
glog.Infof("Deleting node (no longer present in cloud provider): %s", node.Name)
nc.recordNodeEvent(node.Name, fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name))
if err := nc.kubeClient.Nodes().Delete(node.Name); err != nil {
glog.Errorf("Unable to delete node %s: %v", node.Name, err)
continue
}
if err := nc.deletePods(node.Name); err != nil {
glog.Errorf("Unable to delete pods from node %s: %v", node.Name, err)
}
}
}
}
}
return nil
}
// deletePods will delete all pods from master running on given node.
func (nc *NodeController) deletePods(nodeID string) error {
glog.V(2).Infof("Delete all pods from %v", nodeID)
pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(),
fields.OneTermEqualSelector(client.PodHost, nodeID))
if err != nil {
return err
}
nc.recordNodeEvent(nodeID, fmt.Sprintf("Deleting all Pods from Node %v.", nodeID))
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeID {
continue
}
glog.V(2).Infof("Delete pod %v", pod.Name)
nc.recorder.Eventf(&pod, "NodeControllerEviction", "Deleting Pod %s from Node %s", pod.Name, nodeID)
if err := nc.kubeClient.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil {
glog.Errorf("Error deleting pod %v: %v", pod.Name, err)
}
}
return nil
}
// getCondition returns a condition object for the specific condition
// type, nil if the condition is not set.
func (nc *NodeController) getCondition(status *api.NodeStatus, conditionType api.NodeConditionType) *api.NodeCondition {
if status == nil {
return nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return &status.Conditions[i]
}
}
return nil
}

View File

@@ -1,612 +0,0 @@
/*
Copyright 2014 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodecontroller
import (
"errors"
"fmt"
"sort"
"sync"
"testing"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
apierrors "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/testclient"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
)
const (
testNodeMonitorGracePeriod = 40 * time.Second
testNodeStartupGracePeriod = 60 * time.Second
testNodeMonitorPeriod = 5 * time.Second
)
// FakeNodeHandler is a fake implementation of NodesInterface and NodeInterface. It
// allows test cases to have fine-grained control over mock behaviors. We also need
// PodsInterface and PodInterface to test list & delet pods, which is implemented in
// the embeded client.Fake field.
type FakeNodeHandler struct {
*testclient.Fake
// Input: Hooks determine if request is valid or not
CreateHook func(*FakeNodeHandler, *api.Node) bool
Existing []*api.Node
// Output
CreatedNodes []*api.Node
DeletedNodes []*api.Node
UpdatedNodes []*api.Node
UpdatedNodeStatuses []*api.Node
RequestCount int
// Synchronization
createLock sync.Mutex
}
func (c *FakeNodeHandler) Nodes() client.NodeInterface {
return c
}
func (m *FakeNodeHandler) Create(node *api.Node) (*api.Node, error) {
m.createLock.Lock()
defer func() {
m.RequestCount++
m.createLock.Unlock()
}()
for _, n := range m.Existing {
if n.Name == node.Name {
return nil, apierrors.NewAlreadyExists("Minion", node.Name)
}
}
if m.CreateHook == nil || m.CreateHook(m, node) {
nodeCopy := *node
m.CreatedNodes = append(m.CreatedNodes, &nodeCopy)
return node, nil
} else {
return nil, errors.New("Create error.")
}
}
func (m *FakeNodeHandler) Get(name string) (*api.Node, error) {
return nil, nil
}
func (m *FakeNodeHandler) List(label labels.Selector, field fields.Selector) (*api.NodeList, error) {
defer func() { m.RequestCount++ }()
var nodes []*api.Node
for i := 0; i < len(m.UpdatedNodes); i++ {
if !contains(m.UpdatedNodes[i], m.DeletedNodes) {
nodes = append(nodes, m.UpdatedNodes[i])
}
}
for i := 0; i < len(m.Existing); i++ {
if !contains(m.Existing[i], m.DeletedNodes) && !contains(m.Existing[i], nodes) {
nodes = append(nodes, m.Existing[i])
}
}
for i := 0; i < len(m.CreatedNodes); i++ {
if !contains(m.Existing[i], m.DeletedNodes) && !contains(m.CreatedNodes[i], nodes) {
nodes = append(nodes, m.CreatedNodes[i])
}
}
nodeList := &api.NodeList{}
for _, node := range nodes {
nodeList.Items = append(nodeList.Items, *node)
}
return nodeList, nil
}
func (m *FakeNodeHandler) Delete(id string) error {
m.DeletedNodes = append(m.DeletedNodes, newNode(id))
m.RequestCount++
return nil
}
func (m *FakeNodeHandler) Update(node *api.Node) (*api.Node, error) {
nodeCopy := *node
m.UpdatedNodes = append(m.UpdatedNodes, &nodeCopy)
m.RequestCount++
return node, nil
}
func (m *FakeNodeHandler) UpdateStatus(node *api.Node) (*api.Node, error) {
nodeCopy := *node
m.UpdatedNodeStatuses = append(m.UpdatedNodeStatuses, &nodeCopy)
m.RequestCount++
return node, nil
}
func (m *FakeNodeHandler) Watch(label labels.Selector, field fields.Selector, resourceVersion string) (watch.Interface, error) {
return nil, nil
}
func TestMonitorNodeStatusEvictPods(t *testing.T) {
fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute
table := []struct {
fakeNodeHandler *FakeNodeHandler
timeToPass time.Duration
newNodeStatus api.NodeStatus
expectedEvictPods bool
description string
}{
// Node created recently, with no status (happens only at cluster startup).
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeNow,
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
timeToPass: 0,
newNodeStatus: api.NodeStatus{},
expectedEvictPods: false,
description: "Node created recently, with no status.",
},
// Node created long time ago, and kubelet posted NotReady for a short period of time.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFalse,
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
timeToPass: evictionTimeout,
newNodeStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFalse,
// Node status has just been updated, and is NotReady for 10min.
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
expectedEvictPods: false,
description: "Node created long time ago, and kubelet posted NotReady for a short period of time.",
},
// Node created long time ago, and kubelet posted NotReady for a long period of time.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFalse,
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
timeToPass: time.Hour,
newNodeStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionFalse,
// Node status has just been updated, and is NotReady for 1hr.
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 59, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
expectedEvictPods: true,
description: "Node created long time ago, and kubelet posted NotReady for a long period of time.",
},
// Node created long time ago, node controller posted Unknown for a short period of time.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
timeToPass: evictionTimeout - testNodeMonitorGracePeriod,
newNodeStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
// Node status was updated by nodecontroller 10min ago
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
expectedEvictPods: false,
description: "Node created long time ago, node controller posted Unknown for a short period of time.",
},
// Node created long time ago, node controller posted Unknown for a long period of time.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
timeToPass: 60 * time.Minute,
newNodeStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
// Node status was updated by nodecontroller 1hr ago
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
expectedEvictPods: true,
description: "Node created long time ago, node controller posted Unknown for a long period of time.",
},
}
for _, item := range table {
podEvictor := NewPodEvictor(util.NewFakeRateLimiter())
nodeController := NewNodeController(nil, item.fakeNodeHandler, 10,
evictionTimeout, podEvictor, testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.timeToPass > 0 {
nodeController.now = func() util.Time { return util.Time{Time: fakeNow.Add(item.timeToPass)} }
item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
}
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
podEvictor.TryEvict(func(nodeName string) { nodeController.deletePods(nodeName) })
podEvicted := false
for _, action := range item.fakeNodeHandler.Actions() {
if action.Action == "delete-pod" {
podEvicted = true
}
}
if item.expectedEvictPods != podEvicted {
t.Errorf("expected pod eviction: %+v, got %+v for %+v", item.expectedEvictPods,
podEvicted, item.description)
}
}
}
func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
fakeNow := util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
table := []struct {
fakeNodeHandler *FakeNodeHandler
timeToPass time.Duration
newNodeStatus api.NodeStatus
expectedEvictPods bool
expectedRequestCount int
expectedNodes []*api.Node
}{
// Node created long time ago, without status:
// Expect Unknown status posted from node controller.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
expectedRequestCount: 2, // List+Update
expectedNodes: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
Reason: fmt.Sprintf("Kubelet never posted node status."),
LastHeartbeatTime: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
LastTransitionTime: fakeNow,
},
},
},
},
},
},
// Node created recently, without status.
// Expect no action from node controller (within startup grace period).
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeNow,
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
expectedRequestCount: 1, // List
expectedNodes: nil,
},
// Node created long time ago, with status updated by kubelet exceeds grace period.
// Expect Unknown status posted from node controller.
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status hasn't been updated for 1hr.
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
Spec: api.NodeSpec{
ExternalID: "node0",
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
expectedRequestCount: 3, // (List+)List+Update
timeToPass: time.Hour,
newNodeStatus: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status hasn't been updated for 1hr.
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
expectedNodes: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionUnknown,
Reason: fmt.Sprintf("Kubelet stopped posting node status."),
LastHeartbeatTime: util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: util.Time{util.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
Spec: api.NodeSpec{
ExternalID: "node0",
},
},
},
},
// Node created long time ago, with status updated recently.
// Expect no action from node controller (within monitor grace period).
{
fakeNodeHandler: &FakeNodeHandler{
Existing: []*api.Node{
{
ObjectMeta: api.ObjectMeta{
Name: "node0",
CreationTimestamp: util.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Status: api.NodeStatus{
Conditions: []api.NodeCondition{
{
Type: api.NodeReady,
Status: api.ConditionTrue,
// Node status has just been updated.
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
Spec: api.NodeSpec{
ExternalID: "node0",
},
},
},
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
},
expectedRequestCount: 1, // List
expectedNodes: nil,
},
}
for _, item := range table {
nodeController := NewNodeController(nil, item.fakeNodeHandler, 10, 5*time.Minute, NewPodEvictor(util.NewFakeRateLimiter()),
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if item.timeToPass > 0 {
nodeController.now = func() util.Time { return util.Time{Time: fakeNow.Add(item.timeToPass)} }
item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
}
if item.expectedRequestCount != item.fakeNodeHandler.RequestCount {
t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
}
if len(item.fakeNodeHandler.UpdatedNodes) > 0 && !api.Semantic.DeepEqual(item.expectedNodes, item.fakeNodeHandler.UpdatedNodes) {
t.Errorf("expected nodes %+v, got %+v", item.expectedNodes[0],
item.fakeNodeHandler.UpdatedNodes[0])
}
}
}
func newNode(name string) *api.Node {
return &api.Node{
ObjectMeta: api.ObjectMeta{Name: name},
Spec: api.NodeSpec{
ExternalID: name,
},
Status: api.NodeStatus{
Capacity: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
},
}
}
func newPod(name, host string) *api.Pod {
return &api.Pod{ObjectMeta: api.ObjectMeta{Name: name}, Spec: api.PodSpec{NodeName: host}}
}
func sortedNodeNames(nodes []*api.Node) []string {
nodeNames := []string{}
for _, node := range nodes {
nodeNames = append(nodeNames, node.Name)
}
sort.Strings(nodeNames)
return nodeNames
}
func sortedNodeAddresses(nodes []*api.Node) []string {
nodeAddresses := []string{}
for _, node := range nodes {
for _, addr := range node.Status.Addresses {
nodeAddresses = append(nodeAddresses, addr.Address)
}
}
sort.Strings(nodeAddresses)
return nodeAddresses
}
func sortedNodeExternalIDs(nodes []*api.Node) []string {
nodeExternalIDs := []string{}
for _, node := range nodes {
nodeExternalIDs = append(nodeExternalIDs, node.Spec.ExternalID)
}
sort.Strings(nodeExternalIDs)
return nodeExternalIDs
}
func contains(node *api.Node, nodes []*api.Node) bool {
for i := 0; i < len(nodes); i++ {
if node.Name == nodes[i].Name {
return true
}
}
return false
}

View File

@@ -1,129 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodecontroller
import (
"sync"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/golang/glog"
)
// A FIFO queue which additionally guarantees that any element can be added only once until
// it is removed.
type UniqueQueue struct {
lock sync.Mutex
queue []string
set util.StringSet
}
// Entity responsible for evicting Pods from inserted Nodes. It uses RateLimiter to avoid
// evicting everything at once. Note that we rate limit eviction of Nodes not individual Pods.
type PodEvictor struct {
queue UniqueQueue
deletingPodsRateLimiter util.RateLimiter
}
// Adds a new value to the queue if it wasn't added before, or was explicitly removed by the
// Remove call. Returns true if new value was added.
func (q *UniqueQueue) Add(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
if !q.set.Has(value) {
q.queue = append(q.queue, value)
q.set.Insert(value)
return true
} else {
return false
}
}
// Removes the value from the queue, so Get() call won't return it, and allow subsequent addition
// of the given value. If the value is not present does nothing and returns false.
func (q *UniqueQueue) Remove(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
q.set.Delete(value)
for i, val := range q.queue {
if val == value {
if i > 0 && i < len(q.queue)-1 {
q.queue = append(q.queue[0:i], q.queue[i+1:len(q.queue)]...)
} else if i > 0 {
q.queue = q.queue[0 : len(q.queue)-1]
} else {
q.queue = q.queue[1:len(q.queue)]
}
return true
}
}
return false
}
// Returns the oldest added value that wasn't returned yet.
func (q *UniqueQueue) Get() (string, bool) {
q.lock.Lock()
defer q.lock.Unlock()
if len(q.queue) == 0 {
return "", false
}
result := q.queue[0]
q.queue = q.queue[1:len(q.queue)]
return result, true
}
// Creates new PodEvictor which will use given RateLimiter to oversee eviction.
func NewPodEvictor(deletingPodsRateLimiter util.RateLimiter) *PodEvictor {
return &PodEvictor{
queue: UniqueQueue{
queue: make([]string, 0),
set: util.NewStringSet(),
},
deletingPodsRateLimiter: deletingPodsRateLimiter,
}
}
// Tries to evict all Pods from previously inserted Nodes. Ends prematurely if RateLimiter forbids any eviction.
// Each Node is processed only once, as long as it's not Removed, i.e. calling multiple AddNodeToEvict does not result
// with multiple evictions as long as RemoveNodeToEvict is not called.
func (pe *PodEvictor) TryEvict(delFunc func(string)) {
val, ok := pe.queue.Get()
for ok {
if pe.deletingPodsRateLimiter.CanAccept() {
glog.Infof("PodEvictor is evicting Pods on Node: %v", val)
delFunc(val)
} else {
glog.V(1).Info("PodEvictor is rate limitted.")
break
}
val, ok = pe.queue.Get()
}
}
// Adds Node to the Evictor to be processed later. Won't add the same Node second time if it was already
// added and not removed.
func (pe *PodEvictor) AddNodeToEvict(nodeName string) bool {
return pe.queue.Add(nodeName)
}
// Removes Node from the Evictor. The Node won't be processed until added again.
func (pe *PodEvictor) RemoveNodeToEvict(nodeName string) bool {
return pe.queue.Remove(nodeName)
}

View File

@@ -1,146 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodecontroller
import (
"testing"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
)
func CheckQueueEq(lhs, rhs []string) bool {
for i := 0; i < len(lhs); i++ {
if rhs[i] != lhs[i] {
return false
}
}
return true
}
func CheckSetEq(lhs, rhs util.StringSet) bool {
return lhs.HasAll(rhs.List()...) && rhs.HasAll(lhs.List()...)
}
func TestAddNode(t *testing.T) {
evictor := NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
queuePattern := []string{"first", "second", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have lenght %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern := util.NewStringSet("first", "second", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
}
func TestDelNode(t *testing.T) {
evictor := NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor.RemoveNodeToEvict("first")
queuePattern := []string{"second", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have length %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern := util.NewStringSet("second", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
evictor = NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor.RemoveNodeToEvict("second")
queuePattern = []string{"first", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have lenght %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern = util.NewStringSet("first", "third")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
evictor = NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor.RemoveNodeToEvict("third")
queuePattern = []string{"first", "second"}
if len(evictor.queue.queue) != len(queuePattern) {
t.Fatalf("Queue %v should have lenght %d", evictor.queue.queue, len(queuePattern))
}
if !CheckQueueEq(queuePattern, evictor.queue.queue) {
t.Errorf("Invalid queue. Got %v, expected %v", evictor.queue.queue, queuePattern)
}
setPattern = util.NewStringSet("first", "second")
if len(evictor.queue.set) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, evictor.queue.set) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
}
func TestEvictNode(t *testing.T) {
evictor := NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor.RemoveNodeToEvict("second")
deletedMap := util.NewStringSet()
evictor.TryEvict(func(nodeName string) { deletedMap.Insert(nodeName) })
setPattern := util.NewStringSet("first", "third")
if len(deletedMap) != len(setPattern) {
t.Fatalf("Map %v should have length %d", evictor.queue.set, len(setPattern))
}
if !CheckSetEq(setPattern, deletedMap) {
t.Errorf("Invalid map. Got %v, expected %v", deletedMap, setPattern)
}
}

View File

@@ -1,19 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package routecontroller contains code for syncing cloud routing rules with
// the list of registered nodes.
package routecontroller

View File

@@ -1,128 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package routecontroller
import (
"fmt"
"net"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/golang/glog"
)
type RouteController struct {
routes cloudprovider.Routes
kubeClient client.Interface
clusterName string
clusterCIDR *net.IPNet
}
func New(routes cloudprovider.Routes, kubeClient client.Interface, clusterName string, clusterCIDR *net.IPNet) *RouteController {
return &RouteController{
routes: routes,
kubeClient: kubeClient,
clusterName: clusterName,
clusterCIDR: clusterCIDR,
}
}
func (rc *RouteController) Run(syncPeriod time.Duration) {
go util.Forever(func() {
if err := rc.reconcileNodeRoutes(); err != nil {
glog.Errorf("Couldn't reconcile node routes: %v", err)
}
}, syncPeriod)
}
func (rc *RouteController) reconcileNodeRoutes() error {
routeList, err := rc.routes.ListRoutes(rc.clusterName)
if err != nil {
return fmt.Errorf("error listing routes: %v", err)
}
// TODO (cjcullen): use pkg/controller/framework.NewInformer to watch this
// and reduce the number of lists needed.
nodeList, err := rc.kubeClient.Nodes().List(labels.Everything(), fields.Everything())
if err != nil {
return fmt.Errorf("error listing nodes: %v", err)
}
return rc.reconcile(nodeList.Items, routeList)
}
func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.Route) error {
// nodeCIDRs maps nodeName->nodeCIDR
nodeCIDRs := make(map[string]string)
// routeMap maps routeTargetInstance->route
routeMap := make(map[string]*cloudprovider.Route)
for _, route := range routes {
routeMap[route.TargetInstance] = route
}
for _, node := range nodes {
// Check if we have a route for this node w/ the correct CIDR.
r := routeMap[node.Name]
if r == nil || r.DestinationCIDR != node.Spec.PodCIDR {
// If not, create the route.
route := &cloudprovider.Route{
TargetInstance: node.Name,
DestinationCIDR: node.Spec.PodCIDR,
}
nameHint := string(node.UID)
go func(nameHint string, route *cloudprovider.Route) {
if err := rc.routes.CreateRoute(rc.clusterName, nameHint, route); err != nil {
glog.Errorf("Could not create route %s %s: %v", nameHint, route.DestinationCIDR, err)
}
}(nameHint, route)
}
nodeCIDRs[node.Name] = node.Spec.PodCIDR
}
for _, route := range routes {
if rc.isResponsibleForRoute(route) {
// Check if this route applies to a node we know about & has correct CIDR.
if nodeCIDRs[route.TargetInstance] != route.DestinationCIDR {
// Delete the route.
go func(route *cloudprovider.Route) {
if err := rc.routes.DeleteRoute(rc.clusterName, route); err != nil {
glog.Errorf("Could not delete route %s %s: %v", route.Name, route.DestinationCIDR, err)
}
}(route)
}
}
}
return nil
}
func (rc *RouteController) isResponsibleForRoute(route *cloudprovider.Route) bool {
_, cidr, err := net.ParseCIDR(route.DestinationCIDR)
if err != nil {
glog.Errorf("Ignoring route %s, unparsable CIDR: %v", route.Name, err)
return false
}
// Not responsible if this route's CIDR is not within our clusterCIDR
lastIP := make([]byte, len(cidr.IP))
for i := range lastIP {
lastIP[i] = cidr.IP[i] | ^cidr.Mask[i]
}
if !rc.clusterCIDR.Contains(cidr.IP) || !rc.clusterCIDR.Contains(lastIP) {
return false
}
return true
}

View File

@@ -1,208 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package routecontroller
import (
"net"
"testing"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/fake"
)
func TestIsResponsibleForRoute(t *testing.T) {
myClusterName := "my-awesome-cluster"
myClusterRoute := "my-awesome-cluster-12345678-90ab-cdef-1234-567890abcdef"
testCases := []struct {
clusterCIDR string
routeName string
routeCIDR string
expectedResponsible bool
}{
// Routes that belong to this cluster
{"10.244.0.0/16", myClusterRoute, "10.244.0.0/24", true},
{"10.244.0.0/16", myClusterRoute, "10.244.10.0/24", true},
{"10.244.0.0/16", myClusterRoute, "10.244.255.0/24", true},
{"10.244.0.0/14", myClusterRoute, "10.244.0.0/24", true},
{"10.244.0.0/14", myClusterRoute, "10.247.255.0/24", true},
// Routes that match our naming/tagging scheme, but are outside our cidr
{"10.244.0.0/16", myClusterRoute, "10.224.0.0/24", false},
{"10.244.0.0/16", myClusterRoute, "10.0.10.0/24", false},
{"10.244.0.0/16", myClusterRoute, "10.255.255.0/24", false},
{"10.244.0.0/14", myClusterRoute, "10.248.0.0/24", false},
{"10.244.0.0/14", myClusterRoute, "10.243.255.0/24", false},
}
for i, testCase := range testCases {
_, cidr, err := net.ParseCIDR(testCase.clusterCIDR)
if err != nil {
t.Errorf("%d. Error in test case: unparsable cidr %q", i, testCase.clusterCIDR)
}
rc := New(nil, nil, myClusterName, cidr)
route := &cloudprovider.Route{
Name: testCase.routeName,
TargetInstance: "doesnt-matter-for-this-test",
DestinationCIDR: testCase.routeCIDR,
}
if resp := rc.isResponsibleForRoute(route); resp != testCase.expectedResponsible {
t.Errorf("%d. isResponsibleForRoute() = %t; want %t", i, resp, testCase.expectedResponsible)
}
}
}
func TestReconcile(t *testing.T) {
cluster := "my-k8s"
testCases := []struct {
nodes []api.Node
initialRoutes []*cloudprovider.Route
expectedRoutes []*cloudprovider.Route
}{
// 2 nodes, routes already there
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
},
// 2 nodes, one route already there
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
},
// 2 nodes, no routes yet
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
},
initialRoutes: []*cloudprovider.Route{},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
},
// 2 nodes, a few too many routes
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
{cluster + "-03", "node-3", "10.120.2.0/24"},
{cluster + "-04", "node-4", "10.120.3.0/24"},
},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
},
// 2 nodes, 2 routes, but only 1 is right
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-03", "node-3", "10.120.2.0/24"},
},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
},
}
for i, testCase := range testCases {
cloud := &fake_cloud.FakeCloud{RouteMap: make(map[string]*fake_cloud.FakeRoute)}
for _, route := range testCase.initialRoutes {
fakeRoute := &fake_cloud.FakeRoute{}
fakeRoute.ClusterName = cluster
fakeRoute.Route = *route
cloud.RouteMap[route.Name] = fakeRoute
}
routes, ok := cloud.Routes()
if !ok {
t.Error("Error in test: fake_cloud doesn't support Routes()")
}
_, cidr, _ := net.ParseCIDR("10.120.0.0/16")
rc := New(routes, nil, cluster, cidr)
if err := rc.reconcile(testCase.nodes, testCase.initialRoutes); err != nil {
t.Errorf("%d. Error from rc.reconcile(): %v", i, err)
}
var finalRoutes []*cloudprovider.Route
var err error
timeoutChan := time.After(50 * time.Millisecond)
tick := time.NewTicker(10 * time.Millisecond)
defer tick.Stop()
poll:
for {
select {
case <-tick.C:
if finalRoutes, err = routes.ListRoutes(cluster); err == nil && routeListEqual(finalRoutes, testCase.expectedRoutes) {
break poll
}
case <-timeoutChan:
t.Errorf("%d. rc.reconcile() = %v, routes:\n%v\nexpected: nil, routes:\n%v\n", i, err, flatten(finalRoutes), flatten(testCase.expectedRoutes))
break poll
}
}
}
}
func routeListEqual(list1, list2 []*cloudprovider.Route) bool {
if len(list1) != len(list2) {
return false
}
routeMap1 := make(map[string]*cloudprovider.Route)
for _, route1 := range list1 {
routeMap1[route1.Name] = route1
}
for _, route2 := range list2 {
if route1, exists := routeMap1[route2.Name]; !exists || *route1 != *route2 {
return false
}
}
return true
}
func flatten(list []*cloudprovider.Route) []cloudprovider.Route {
var structList []cloudprovider.Route
for _, route := range list {
structList = append(structList, *route)
}
return structList
}

View File

@@ -1,19 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package servicecontroller contains code for syncing cloud load balancers
// with the service registry.
package servicecontroller

View File

@@ -1,671 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package servicecontroller
import (
"fmt"
"net"
"sort"
"sync"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/types"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/golang/glog"
)
const (
workerGoroutines = 10
// How long to wait before retrying the processing of a service change.
// If this changes, the sleep in hack/jenkins/e2e.sh before downing a cluster
// should be changed appropriately.
processingRetryInterval = 5 * time.Second
clientRetryCount = 5
clientRetryInterval = 5 * time.Second
retryable = true
notRetryable = false
)
type cachedService struct {
// The last-known state of the service
lastState *api.Service
// The state as successfully applied to the load balancer
appliedState *api.Service
// Ensures only one goroutine can operate on this service at any given time.
mu sync.Mutex
}
type serviceCache struct {
mu sync.Mutex // protects serviceMap
serviceMap map[string]*cachedService
}
type ServiceController struct {
cloud cloudprovider.Interface
kubeClient client.Interface
clusterName string
balancer cloudprovider.TCPLoadBalancer
zone cloudprovider.Zone
cache *serviceCache
eventBroadcaster record.EventBroadcaster
eventRecorder record.EventRecorder
nodeLister cache.StoreToNodeLister
}
// New returns a new service controller to keep cloud provider service resources
// (like external load balancers) in sync with the registry.
func New(cloud cloudprovider.Interface, kubeClient client.Interface, clusterName string) *ServiceController {
broadcaster := record.NewBroadcaster()
broadcaster.StartRecordingToSink(kubeClient.Events(""))
recorder := broadcaster.NewRecorder(api.EventSource{Component: "service-controller"})
return &ServiceController{
cloud: cloud,
kubeClient: kubeClient,
clusterName: clusterName,
cache: &serviceCache{serviceMap: make(map[string]*cachedService)},
eventBroadcaster: broadcaster,
eventRecorder: recorder,
nodeLister: cache.StoreToNodeLister{
Store: cache.NewStore(cache.MetaNamespaceKeyFunc),
},
}
}
// Run starts a background goroutine that watches for changes to services that
// have (or had) externalLoadBalancers=true and ensures that they have external
// load balancers created and deleted appropriately.
// serviceSyncPeriod controls how often we check the cluster's services to
// ensure that the correct external load balancers exist.
// nodeSyncPeriod controls how often we check the cluster's nodes to determine
// if external load balancers need to be updated to point to a new set.
//
// It's an error to call Run() more than once for a given ServiceController
// object.
func (s *ServiceController) Run(serviceSyncPeriod, nodeSyncPeriod time.Duration) error {
if err := s.init(); err != nil {
return err
}
// We have to make this check beecause the ListWatch that we use in
// WatchServices requires Client functions that aren't in the interface
// for some reason.
if _, ok := s.kubeClient.(*client.Client); !ok {
return fmt.Errorf("ServiceController only works with real Client objects, but was passed something else satisfying the client Interface.")
}
// Get the currently existing set of services and then all future creates
// and updates of services.
// A delta compressor is needed for the DeltaFIFO queue because we only ever
// care about the most recent state.
serviceQueue := cache.NewDeltaFIFO(
cache.MetaNamespaceKeyFunc,
cache.DeltaCompressorFunc(func(d cache.Deltas) cache.Deltas {
if len(d) == 0 {
return d
}
return cache.Deltas{*d.Newest()}
}),
s.cache,
)
lw := cache.NewListWatchFromClient(s.kubeClient.(*client.Client), "services", api.NamespaceAll, fields.Everything())
cache.NewReflector(lw, &api.Service{}, serviceQueue, serviceSyncPeriod).Run()
for i := 0; i < workerGoroutines; i++ {
go s.watchServices(serviceQueue)
}
nodeLW := cache.NewListWatchFromClient(s.kubeClient.(*client.Client), "nodes", api.NamespaceAll, fields.Everything())
cache.NewReflector(nodeLW, &api.Node{}, s.nodeLister.Store, 0).Run()
go s.nodeSyncLoop(nodeSyncPeriod)
return nil
}
func (s *ServiceController) init() error {
if s.cloud == nil {
return fmt.Errorf("ServiceController should not be run without a cloudprovider.")
}
balancer, ok := s.cloud.TCPLoadBalancer()
if !ok {
return fmt.Errorf("the cloud provider does not support external TCP load balancers.")
}
s.balancer = balancer
zones, ok := s.cloud.Zones()
if !ok {
return fmt.Errorf("the cloud provider does not support zone enumeration, which is required for creating external load balancers.")
}
zone, err := zones.GetZone()
if err != nil {
return fmt.Errorf("failed to get zone from cloud provider, will not be able to create external load balancers: %v", err)
}
s.zone = zone
return nil
}
// Loop infinitely, processing all service updates provided by the queue.
func (s *ServiceController) watchServices(serviceQueue *cache.DeltaFIFO) {
for {
newItem := serviceQueue.Pop()
deltas, ok := newItem.(cache.Deltas)
if !ok {
glog.Errorf("Received object from service watcher that wasn't Deltas: %+v", newItem)
}
delta := deltas.Newest()
if delta == nil {
glog.Errorf("Received nil delta from watcher queue.")
continue
}
err, shouldRetry := s.processDelta(delta)
if shouldRetry {
// Add the failed service back to the queue so we'll retry it.
glog.Errorf("Failed to process service delta. Retrying: %v", err)
time.Sleep(processingRetryInterval)
serviceQueue.AddIfNotPresent(deltas)
} else if err != nil {
util.HandleError(fmt.Errorf("Failed to process service delta. Not retrying: %v", err))
}
}
}
// Returns an error if processing the delta failed, along with a boolean
// indicator of whether the processing should be retried.
func (s *ServiceController) processDelta(delta *cache.Delta) (error, bool) {
service, ok := delta.Object.(*api.Service)
var namespacedName types.NamespacedName
var cachedService *cachedService
if !ok {
// If the DeltaFIFO saw a key in our cache that it didn't know about, it
// can send a deletion with an unknown state. Grab the service from our
// cache for deleting.
key, ok := delta.Object.(cache.DeletedFinalStateUnknown)
if !ok {
return fmt.Errorf("Delta contained object that wasn't a service or a deleted key: %+v", delta), notRetryable
}
cachedService, ok = s.cache.get(key.Key)
if !ok {
return fmt.Errorf("Service %s not in cache even though the watcher thought it was. Ignoring the deletion.", key), notRetryable
}
service = cachedService.lastState
delta.Object = cachedService.lastState
namespacedName = types.NamespacedName{service.Namespace, service.Name}
} else {
namespacedName.Namespace = service.Namespace
namespacedName.Name = service.Name
cachedService = s.cache.getOrCreate(namespacedName.String())
}
glog.V(2).Infof("Got new %s delta for service: %+v", delta.Type, service)
// Ensure that no other goroutine will interfere with our processing of the
// service.
cachedService.mu.Lock()
defer cachedService.mu.Unlock()
// Update the cached service (used above for populating synthetic deletes)
cachedService.lastState = service
// TODO: Handle added, updated, and sync differently?
switch delta.Type {
case cache.Added:
fallthrough
case cache.Updated:
fallthrough
case cache.Sync:
err, retry := s.createLoadBalancerIfNeeded(namespacedName, service, cachedService.appliedState)
if err != nil {
s.eventRecorder.Event(service, "creating loadbalancer failed", err.Error())
return err, retry
}
// Always update the cache upon success.
// NOTE: Since we update the cached service if and only if we successully
// processed it, a cached service being nil implies that it hasn't yet
// been successfully processed.
cachedService.appliedState = service
s.cache.set(namespacedName.String(), cachedService)
case cache.Deleted:
err := s.balancer.EnsureTCPLoadBalancerDeleted(s.loadBalancerName(service), s.zone.Region)
if err != nil {
s.eventRecorder.Event(service, "deleting loadbalancer failed", err.Error())
return err, retryable
}
s.cache.delete(namespacedName.String())
default:
glog.Errorf("Unexpected delta type: %v", delta.Type)
}
return nil, notRetryable
}
// Returns whatever error occurred along with a boolean indicator of whether it
// should be retried.
func (s *ServiceController) createLoadBalancerIfNeeded(namespacedName types.NamespacedName, service, cachedService *api.Service) (error, bool) {
if cachedService != nil && !needsUpdate(cachedService, service) {
glog.Infof("LB already exists and doesn't need update for service %s", namespacedName)
return nil, notRetryable
}
if cachedService != nil {
// If the service already exists but needs to be updated, delete it so that
// we can recreate it cleanly.
if wantsExternalLoadBalancer(cachedService) {
glog.Infof("Deleting existing load balancer for service %s that needs an updated load balancer.", namespacedName)
if err := s.balancer.EnsureTCPLoadBalancerDeleted(s.loadBalancerName(cachedService), s.zone.Region); err != nil {
return err, retryable
}
}
} else {
// If we don't have any cached memory of the load balancer, we have to ask
// the cloud provider for what it knows about it.
status, exists, err := s.balancer.GetTCPLoadBalancer(s.loadBalancerName(service), s.zone.Region)
if err != nil {
return fmt.Errorf("Error getting LB for service %s: %v", namespacedName, err), retryable
}
if exists && api.LoadBalancerStatusEqual(status, &service.Status.LoadBalancer) {
glog.Infof("LB already exists with status %s for previously uncached service %s", status, namespacedName)
return nil, notRetryable
} else if exists {
glog.Infof("Deleting old LB for previously uncached service %s whose endpoint %s doesn't match the service's desired IPs %v",
namespacedName, status, service.Spec.DeprecatedPublicIPs)
if err := s.balancer.EnsureTCPLoadBalancerDeleted(s.loadBalancerName(service), s.zone.Region); err != nil {
return err, retryable
}
}
}
// Save the state so we can avoid a write if it doesn't change
previousState := api.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer)
if !wantsExternalLoadBalancer(service) {
glog.Infof("Not creating LB for service %s that doesn't want one.", namespacedName)
service.Status.LoadBalancer = api.LoadBalancerStatus{}
} else {
glog.V(2).Infof("Creating LB for service %s", namespacedName)
// The load balancer doesn't exist yet, so create it.
err := s.createExternalLoadBalancer(service)
if err != nil {
return fmt.Errorf("failed to create external load balancer for service %s: %v", namespacedName, err), retryable
}
}
// Write the state if changed
// TODO: Be careful here ... what if there were other changes to the service?
if !api.LoadBalancerStatusEqual(previousState, &service.Status.LoadBalancer) {
if err := s.persistUpdate(service); err != nil {
return fmt.Errorf("Failed to persist updated status to apiserver, even after retries. Giving up: %v", err), notRetryable
}
} else {
glog.Infof("Not persisting unchanged LoadBalancerStatus to registry.")
}
return nil, notRetryable
}
func (s *ServiceController) persistUpdate(service *api.Service) error {
var err error
for i := 0; i < clientRetryCount; i++ {
_, err = s.kubeClient.Services(service.Namespace).Update(service)
if err == nil {
return nil
}
// If the object no longer exists, we don't want to recreate it. Just bail
// out so that we can process the delete, which we should soon be receiving
// if we haven't already.
if errors.IsNotFound(err) {
glog.Infof("Not persisting update to service that no longer exists: %v", err)
return nil
}
// TODO: Try to resolve the conflict if the change was unrelated to load
// balancer status. For now, just rely on the fact that we'll
// also process the update that caused the resource version to change.
if errors.IsConflict(err) {
glog.Infof("Not persisting update to service that has been changed since we received it: %v", err)
return nil
}
glog.Warningf("Failed to persist updated LoadBalancerStatus to service %s after creating its external load balancer: %v",
service.Name, err)
time.Sleep(clientRetryInterval)
}
return err
}
func (s *ServiceController) createExternalLoadBalancer(service *api.Service) error {
ports, err := getPortsForLB(service)
if err != nil {
return err
}
nodes, err := s.nodeLister.List()
if err != nil {
return err
}
name := s.loadBalancerName(service)
if len(service.Spec.DeprecatedPublicIPs) > 0 {
for _, publicIP := range service.Spec.DeprecatedPublicIPs {
// TODO: Make this actually work for multiple IPs by using different
// names for each. For now, we'll just create the first and break.
status, err := s.balancer.CreateTCPLoadBalancer(name, s.zone.Region, net.ParseIP(publicIP),
ports, hostsFromNodeList(&nodes), service.Spec.SessionAffinity)
if err != nil {
return err
} else {
service.Status.LoadBalancer = *status
}
break
}
} else {
status, err := s.balancer.CreateTCPLoadBalancer(name, s.zone.Region, nil,
ports, hostsFromNodeList(&nodes), service.Spec.SessionAffinity)
if err != nil {
return err
} else {
service.Status.LoadBalancer = *status
}
}
return nil
}
// ListKeys implements the interface required by DeltaFIFO to list the keys we
// already know about.
func (s *serviceCache) ListKeys() []string {
s.mu.Lock()
defer s.mu.Unlock()
keys := make([]string, 0, len(s.serviceMap))
for k := range s.serviceMap {
keys = append(keys, k)
}
return keys
}
// GetByKey returns the value stored in the serviceMap under the given key
func (s *serviceCache) GetByKey(key string) (interface{}, bool, error) {
s.mu.Lock()
defer s.mu.Unlock()
if v, ok := s.serviceMap[key]; ok {
return v, true, nil
}
return nil, false, nil
}
// ListKeys implements the interface required by DeltaFIFO to list the keys we
// already know about.
func (s *serviceCache) allServices() []*cachedService {
s.mu.Lock()
defer s.mu.Unlock()
services := make([]*cachedService, 0, len(s.serviceMap))
for _, v := range s.serviceMap {
services = append(services, v)
}
return services
}
func (s *serviceCache) get(serviceName string) (*cachedService, bool) {
s.mu.Lock()
defer s.mu.Unlock()
service, ok := s.serviceMap[serviceName]
return service, ok
}
func (s *serviceCache) getOrCreate(serviceName string) *cachedService {
s.mu.Lock()
defer s.mu.Unlock()
service, ok := s.serviceMap[serviceName]
if !ok {
service = &cachedService{}
s.serviceMap[serviceName] = service
}
return service
}
func (s *serviceCache) set(serviceName string, service *cachedService) {
s.mu.Lock()
defer s.mu.Unlock()
s.serviceMap[serviceName] = service
}
func (s *serviceCache) delete(serviceName string) {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.serviceMap, serviceName)
}
func needsUpdate(oldService *api.Service, newService *api.Service) bool {
if !wantsExternalLoadBalancer(oldService) && !wantsExternalLoadBalancer(newService) {
return false
}
if wantsExternalLoadBalancer(oldService) != wantsExternalLoadBalancer(newService) {
return true
}
if !portsEqualForLB(oldService, newService) || oldService.Spec.SessionAffinity != newService.Spec.SessionAffinity {
return true
}
if len(oldService.Spec.DeprecatedPublicIPs) != len(newService.Spec.DeprecatedPublicIPs) {
return true
}
for i := range oldService.Spec.DeprecatedPublicIPs {
if oldService.Spec.DeprecatedPublicIPs[i] != newService.Spec.DeprecatedPublicIPs[i] {
return true
}
}
return false
}
func (s *ServiceController) loadBalancerName(service *api.Service) string {
return cloudprovider.GetLoadBalancerName(service)
}
func getPortsForLB(service *api.Service) ([]*api.ServicePort, error) {
ports := []*api.ServicePort{}
for i := range service.Spec.Ports {
// TODO: Support UDP. Remove the check from the API validation package once
// it's supported.
sp := &service.Spec.Ports[i]
if sp.Protocol != api.ProtocolTCP {
return nil, fmt.Errorf("external load balancers for non TCP services are not currently supported.")
}
ports = append(ports, sp)
}
return ports, nil
}
func portsEqualForLB(x, y *api.Service) bool {
xPorts, err := getPortsForLB(x)
if err != nil {
return false
}
yPorts, err := getPortsForLB(y)
if err != nil {
return false
}
return portSlicesEqualForLB(xPorts, yPorts)
}
func portSlicesEqualForLB(x, y []*api.ServicePort) bool {
if len(x) != len(y) {
return false
}
for i := range x {
if !portEqualForLB(x[i], y[i]) {
return false
}
}
return true
}
func portEqualForLB(x, y *api.ServicePort) bool {
// TODO: Should we check name? (In theory, an LB could expose it)
if x.Name != y.Name {
return false
}
if x.Protocol != y.Protocol {
return false
}
if x.Port != y.Port {
return false
}
if x.NodePort != y.NodePort {
return false
}
// We don't check TargetPort; that is not relevant for load balancing
// TODO: Should we blank it out? Or just check it anyway?
return true
}
func intSlicesEqual(x, y []int) bool {
if len(x) != len(y) {
return false
}
if !sort.IntsAreSorted(x) {
sort.Ints(x)
}
if !sort.IntsAreSorted(y) {
sort.Ints(y)
}
for i := range x {
if x[i] != y[i] {
return false
}
}
return true
}
func stringSlicesEqual(x, y []string) bool {
if len(x) != len(y) {
return false
}
if !sort.StringsAreSorted(x) {
sort.Strings(x)
}
if !sort.StringsAreSorted(y) {
sort.Strings(y)
}
for i := range x {
if x[i] != y[i] {
return false
}
}
return true
}
func hostsFromNodeList(list *api.NodeList) []string {
result := make([]string, len(list.Items))
for ix := range list.Items {
result[ix] = list.Items[ix].Name
}
return result
}
// nodeSyncLoop handles updating the hosts pointed to by all external load
// balancers whenever the set of nodes in the cluster changes.
func (s *ServiceController) nodeSyncLoop(period time.Duration) {
var prevHosts []string
var servicesToUpdate []*cachedService
// TODO: Eliminate the unneeded now variable once we stop compiling in go1.3.
// It's needed at the moment because go1.3 requires ranges to be assigned to
// something to compile, and gofmt1.4 complains about using `_ = range`.
for now := range time.Tick(period) {
_ = now
nodes, err := s.nodeLister.List()
if err != nil {
glog.Errorf("Failed to retrieve current set of nodes from node lister: %v", err)
continue
}
newHosts := hostsFromNodeList(&nodes)
if stringSlicesEqual(newHosts, prevHosts) {
// The set of nodes in the cluster hasn't changed, but we can retry
// updating any services that we failed to update last time around.
servicesToUpdate = s.updateLoadBalancerHosts(servicesToUpdate, newHosts)
continue
}
glog.Infof("Detected change in list of current cluster nodes. New node set: %v", newHosts)
// Try updating all services, and save the ones that fail to try again next
// round.
servicesToUpdate = s.cache.allServices()
numServices := len(servicesToUpdate)
servicesToUpdate = s.updateLoadBalancerHosts(servicesToUpdate, newHosts)
glog.Infof("Successfully updated %d out of %d external load balancers to direct traffic to the updated set of nodes",
numServices-len(servicesToUpdate), numServices)
prevHosts = newHosts
}
}
// updateLoadBalancerHosts updates all existing external load balancers so that
// they will match the list of hosts provided.
// Returns the list of services that couldn't be updated.
func (s *ServiceController) updateLoadBalancerHosts(services []*cachedService, hosts []string) (servicesToRetry []*cachedService) {
for _, service := range services {
func() {
service.mu.Lock()
defer service.mu.Unlock()
// If the service is nil, that means it hasn't yet been successfully dealt
// with by the load balancer reconciler. We can trust the load balancer
// reconciler to ensure the service's load balancer is created to target
// the correct nodes.
if service.appliedState == nil {
return
}
if err := s.lockedUpdateLoadBalancerHosts(service.appliedState, hosts); err != nil {
glog.Errorf("External error while updating TCP load balancer: %v.", err)
servicesToRetry = append(servicesToRetry, service)
}
}()
}
return servicesToRetry
}
// Updates the external load balancer of a service, assuming we hold the mutex
// associated with the service.
func (s *ServiceController) lockedUpdateLoadBalancerHosts(service *api.Service, hosts []string) error {
if !wantsExternalLoadBalancer(service) {
return nil
}
name := cloudprovider.GetLoadBalancerName(service)
err := s.balancer.UpdateTCPLoadBalancer(name, s.zone.Region, hosts)
if err == nil {
return nil
}
// It's only an actual error if the load balancer still exists.
if _, exists, err := s.balancer.GetTCPLoadBalancer(name, s.zone.Region); err != nil {
glog.Errorf("External error while checking if TCP load balancer %q exists: name, %v")
} else if !exists {
return nil
}
return err
}
func wantsExternalLoadBalancer(service *api.Service) bool {
return service.Spec.Type == api.ServiceTypeLoadBalancer
}

View File

@@ -1,221 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package servicecontroller
import (
"reflect"
"testing"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/testclient"
fake_cloud "github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/fake"
"github.com/GoogleCloudPlatform/kubernetes/pkg/types"
)
const region = "us-central"
func newService(name string, uid types.UID, serviceType api.ServiceType) *api.Service {
return &api.Service{ObjectMeta: api.ObjectMeta{Name: name, Namespace: "namespace", UID: uid}, Spec: api.ServiceSpec{Type: serviceType}}
}
func TestCreateExternalLoadBalancer(t *testing.T) {
table := []struct {
service *api.Service
expectErr bool
expectCreateAttempt bool
}{
{
service: &api.Service{
ObjectMeta: api.ObjectMeta{
Name: "no-external-balancer",
Namespace: "default",
},
Spec: api.ServiceSpec{
Type: api.ServiceTypeClusterIP,
},
},
expectErr: false,
expectCreateAttempt: false,
},
{
service: &api.Service{
ObjectMeta: api.ObjectMeta{
Name: "udp-service",
Namespace: "default",
},
Spec: api.ServiceSpec{
Ports: []api.ServicePort{{
Port: 80,
Protocol: api.ProtocolUDP,
}},
Type: api.ServiceTypeLoadBalancer,
},
},
expectErr: true,
expectCreateAttempt: false,
},
{
service: &api.Service{
ObjectMeta: api.ObjectMeta{
Name: "basic-service1",
Namespace: "default",
},
Spec: api.ServiceSpec{
Ports: []api.ServicePort{{
Port: 80,
Protocol: api.ProtocolTCP,
}},
Type: api.ServiceTypeLoadBalancer,
},
},
expectErr: false,
expectCreateAttempt: true,
},
}
for _, item := range table {
cloud := &fake_cloud.FakeCloud{}
cloud.Region = region
client := &testclient.Fake{}
controller := New(cloud, client, "test-cluster")
controller.init()
cloud.Calls = nil // ignore any cloud calls made in init()
client.ClearActions() // ignore any client calls made in init()
err, _ := controller.createLoadBalancerIfNeeded(types.NamespacedName{"foo", "bar"}, item.service, nil)
if !item.expectErr && err != nil {
t.Errorf("unexpected error: %v", err)
} else if item.expectErr && err == nil {
t.Errorf("expected error creating %v, got nil", item.service)
}
actions := client.Actions()
if !item.expectCreateAttempt {
if len(cloud.Calls) > 0 {
t.Errorf("unexpected cloud provider calls: %v", cloud.Calls)
}
if len(actions) > 0 {
t.Errorf("unexpected client actions: %v", actions)
}
} else {
if len(cloud.Balancers) != 1 {
t.Errorf("expected one load balancer to be created, got %v", cloud.Balancers)
} else if cloud.Balancers[0].Name != controller.loadBalancerName(item.service) ||
cloud.Balancers[0].Region != region ||
cloud.Balancers[0].Ports[0].Port != item.service.Spec.Ports[0].Port {
t.Errorf("created load balancer has incorrect parameters: %v", cloud.Balancers[0])
}
actionFound := false
for _, action := range actions {
if action.Action == "update-service" {
actionFound = true
}
}
if !actionFound {
t.Errorf("expected updated service to be sent to client, got these actions instead: %v", actions)
}
}
}
}
// TODO: Finish converting and update comments
func TestUpdateNodesInExternalLoadBalancer(t *testing.T) {
hosts := []string{"node0", "node1", "node73"}
table := []struct {
services []*api.Service
expectedUpdateCalls []fake_cloud.FakeUpdateBalancerCall
}{
{
// No services present: no calls should be made.
services: []*api.Service{},
expectedUpdateCalls: nil,
},
{
// Services do not have external load balancers: no calls should be made.
services: []*api.Service{
newService("s0", "111", api.ServiceTypeClusterIP),
newService("s1", "222", api.ServiceTypeNodePort),
},
expectedUpdateCalls: nil,
},
{
// Services does have an external load balancer: one call should be made.
services: []*api.Service{
newService("s0", "333", api.ServiceTypeLoadBalancer),
},
expectedUpdateCalls: []fake_cloud.FakeUpdateBalancerCall{
{Name: "a333", Region: region, Hosts: []string{"node0", "node1", "node73"}},
},
},
{
// Three services have an external load balancer: three calls.
services: []*api.Service{
newService("s0", "444", api.ServiceTypeLoadBalancer),
newService("s1", "555", api.ServiceTypeLoadBalancer),
newService("s2", "666", api.ServiceTypeLoadBalancer),
},
expectedUpdateCalls: []fake_cloud.FakeUpdateBalancerCall{
{Name: "a444", Region: region, Hosts: []string{"node0", "node1", "node73"}},
{Name: "a555", Region: region, Hosts: []string{"node0", "node1", "node73"}},
{Name: "a666", Region: region, Hosts: []string{"node0", "node1", "node73"}},
},
},
{
// Two services have an external load balancer and two don't: two calls.
services: []*api.Service{
newService("s0", "777", api.ServiceTypeNodePort),
newService("s1", "888", api.ServiceTypeLoadBalancer),
newService("s3", "999", api.ServiceTypeLoadBalancer),
newService("s4", "123", api.ServiceTypeClusterIP),
},
expectedUpdateCalls: []fake_cloud.FakeUpdateBalancerCall{
{Name: "a888", Region: region, Hosts: []string{"node0", "node1", "node73"}},
{Name: "a999", Region: region, Hosts: []string{"node0", "node1", "node73"}},
},
},
{
// One service has an external load balancer and one is nil: one call.
services: []*api.Service{
newService("s0", "234", api.ServiceTypeLoadBalancer),
nil,
},
expectedUpdateCalls: []fake_cloud.FakeUpdateBalancerCall{
{Name: "a234", Region: region, Hosts: []string{"node0", "node1", "node73"}},
},
},
}
for _, item := range table {
cloud := &fake_cloud.FakeCloud{}
cloud.Region = region
client := &testclient.Fake{}
controller := New(cloud, client, "test-cluster2")
controller.init()
cloud.Calls = nil // ignore any cloud calls made in init()
var services []*cachedService
for _, service := range item.services {
services = append(services, &cachedService{lastState: service, appliedState: service})
}
if err := controller.updateLoadBalancerHosts(services, hosts); err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(item.expectedUpdateCalls, cloud.UpdateCalls) {
t.Errorf("expected update calls mismatch, expected %+v, got %+v", item.expectedUpdateCalls, cloud.UpdateCalls)
}
}
}
// TODO(a-robinson): Add tests for update/sync/delete.