Revert "add node shutdown taint"

This commit is contained in:
Aleksandra Malinowska
2018-02-16 12:24:27 +01:00
committed by GitHub
parent fc45081784
commit 2d54ba3e0f
19 changed files with 13 additions and 393 deletions

View File

@@ -79,11 +79,6 @@ var (
Effect: v1.TaintEffectNoExecute,
}
shutDownTaint = &v1.Taint{
Key: algorithm.TaintNodeShutdown,
Effect: v1.TaintEffectNoSchedule,
}
nodeConditionToTaintKeyMap = map[v1.NodeConditionType]string{
v1.NodeMemoryPressure: algorithm.TaintNodeMemoryPressure,
v1.NodeOutOfDisk: algorithm.TaintNodeOutOfDisk,
@@ -156,10 +151,9 @@ type Controller struct {
daemonSetStore extensionslisters.DaemonSetLister
daemonSetInformerSynced cache.InformerSynced
nodeLister corelisters.NodeLister
nodeInformerSynced cache.InformerSynced
nodeExistsInCloudProvider func(types.NodeName) (bool, error)
nodeShutdownInCloudProvider func(types.NodeName) (bool, error)
nodeLister corelisters.NodeLister
nodeInformerSynced cache.InformerSynced
nodeExistsInCloudProvider func(types.NodeName) (bool, error)
recorder record.EventRecorder
@@ -245,9 +239,6 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer,
nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) {
return nodeutil.ExistsInCloudProvider(cloud, nodeName)
},
nodeShutdownInCloudProvider: func(nodeName types.NodeName) (bool, error) {
return nodeutil.ShutdownInCloudProvider(cloud, nodeName)
},
recorder: recorder,
nodeMonitorPeriod: nodeMonitorPeriod,
nodeStartupGracePeriod: nodeStartupGracePeriod,
@@ -662,11 +653,6 @@ func (nc *Controller) monitorNodeStatus() error {
glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
}
}
// remove shutdown taint this is needed always depending do we use taintbased or not
err := nc.markNodeAsNotShutdown(node)
if err != nil {
glog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
}
}
// Report node event.
@@ -680,21 +666,7 @@ func (nc *Controller) monitorNodeStatus() error {
// Check with the cloud provider to see if the node still exists. If it
// doesn't, delete the node immediately.
if currentReadyCondition.Status != v1.ConditionTrue && nc.cloud != nil {
// check is node shutdowned, if yes do not deleted it. Instead add taint
exists, err := nc.nodeShutdownInCloudProvider(types.NodeName(node.Name))
if err != nil && err != cloudprovider.NotImplemented {
glog.Errorf("Error determining if node %v shutdown in cloud: %v", node.Name, err)
continue
}
// node shutdown
if exists {
err = controller.AddOrUpdateTaintOnNode(nc.kubeClient, node.Name, shutDownTaint)
if err != nil {
glog.Errorf("Error patching node taints: %v", err)
}
continue
}
exists, err = nc.nodeExistsInCloudProvider(types.NodeName(node.Name))
exists, err := nc.nodeExistsInCloudProvider(types.NodeName(node.Name))
if err != nil {
glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err)
continue
@@ -1130,17 +1102,6 @@ func (nc *Controller) markNodeAsReachable(node *v1.Node) (bool, error) {
return nc.zoneNoExecuteTainter[utilnode.GetZoneKey(node)].Remove(node.Name), nil
}
func (nc *Controller) markNodeAsNotShutdown(node *v1.Node) error {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
err := controller.RemoveTaintOffNode(nc.kubeClient, node.Name, node, shutDownTaint)
if err != nil {
glog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
return err
}
return nil
}
// ComputeZoneState returns a slice of NodeReadyConditions for all Nodes in a given zone.
// The zone is considered:
// - fullyDisrupted if there're no Ready Nodes,

View File

@@ -1360,118 +1360,6 @@ func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
}
}
func TestCloudProviderNodeShutdown(t *testing.T) {
testCases := []struct {
testName string
node *v1.Node
shutdown bool
}{
{
testName: "node shutdowned add taint",
shutdown: true,
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
{
testName: "node started after shutdown remove taint",
shutdown: false,
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
},
Spec: v1.NodeSpec{
ProviderID: "node0",
Taints: []v1.Taint{
{
Key: algorithm.TaintNodeShutdown,
Effect: v1.TaintEffectNoSchedule,
},
},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
fnh := &testutil.FakeNodeHandler{
Existing: []*v1.Node{tc.node},
Clientset: fake.NewSimpleClientset(),
}
nodeController, _ := newNodeLifecycleControllerFromClient(
nil,
fnh,
10*time.Minute,
testRateLimiterQPS,
testRateLimiterQPS,
testLargeClusterThreshold,
testUnhealthyThreshold,
testNodeMonitorGracePeriod,
testNodeStartupGracePeriod,
testNodeMonitorPeriod,
false)
nodeController.cloud = &fakecloud.FakeCloud{}
nodeController.now = func() metav1.Time { return metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC) }
nodeController.recorder = testutil.NewFakeRecorder()
nodeController.nodeShutdownInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return tc.shutdown, nil
}
if err := nodeController.syncNodeStore(fnh); err != nil {
t.Errorf("unexpected error: %v", err)
}
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
if len(fnh.UpdatedNodes) != 1 {
t.Errorf("Node was not updated")
}
if tc.shutdown {
if len(fnh.UpdatedNodes[0].Spec.Taints) != 1 {
t.Errorf("Node Taint was not added")
}
if fnh.UpdatedNodes[0].Spec.Taints[0].Key != "node.cloudprovider.kubernetes.io/shutdown" {
t.Errorf("Node Taint key is not correct")
}
} else {
if len(fnh.UpdatedNodes[0].Spec.Taints) != 0 {
t.Errorf("Node Taint was not removed after node is back in ready state")
}
}
})
}
}
// TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes
// pods and the node when kubelet has not reported, and the cloudprovider says
// the node is gone.
@@ -1516,9 +1404,6 @@ func TestCloudProviderNoRateLimit(t *testing.T) {
nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil
}
nodeController.nodeShutdownInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil
}
// monitorNodeStatus should allow this node to be immediately deleted
if err := nodeController.syncNodeStore(fnh); err != nil {
t.Errorf("unexpected error: %v", err)
@@ -2357,9 +2242,6 @@ func TestNodeEventGeneration(t *testing.T) {
nodeController.nodeExistsInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil
}
nodeController.nodeShutdownInCloudProvider = func(nodeName types.NodeName) (bool, error) {
return false, nil
}
nodeController.now = func() metav1.Time { return fakeNow }
fakeRecorder := testutil.NewFakeRecorder()
nodeController.recorder = fakeRecorder