Add support for removing unsupported huge page sizes

When kubelet is restarted, it will now remove the resources for huge
page sizes no longer supported. This is required when:
- node disables huge pages
- changing the default huge page size in older versions of linux
(because it will then only support the newly set default).
- Software updates that change what sizes are supported (eg. by changing
boot parameters).
This commit is contained in:
Odin Ugedal
2019-07-31 20:13:49 +02:00
parent 21d4d13d98
commit 2830827442
3 changed files with 344 additions and 0 deletions

View File

@@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api"
"k8s.io/klog"
@@ -117,6 +118,7 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
requiresUpdate := kl.reconcileCMADAnnotationWithExistingNode(node, existingNode)
requiresUpdate = kl.updateDefaultLabels(node, existingNode) || requiresUpdate
requiresUpdate = kl.reconcileExtendedResource(node, existingNode) || requiresUpdate
requiresUpdate = kl.reconcileHugePageResource(node, existingNode) || requiresUpdate
if requiresUpdate {
if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil {
klog.Errorf("Unable to reconcile node %q with API server: error updating node: %v", kl.nodeName, err)
@@ -127,6 +129,53 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
return true
}
// reconcileHugePageResource will update huge page capacity for each page size and remove huge page sizes no longer supported
func (kl *Kubelet) reconcileHugePageResource(initialNode, existingNode *v1.Node) bool {
requiresUpdate := false
supportedHugePageResources := sets.String{}
for resourceName := range initialNode.Status.Capacity {
if !v1helper.IsHugePageResourceName(resourceName) {
continue
}
supportedHugePageResources.Insert(string(resourceName))
initialCapacity := initialNode.Status.Capacity[resourceName]
initialAllocatable := initialNode.Status.Allocatable[resourceName]
capacity, resourceIsSupported := existingNode.Status.Capacity[resourceName]
allocatable := existingNode.Status.Allocatable[resourceName]
// Add or update capacity if it the size was previously unsupported or has changed
if !resourceIsSupported || capacity.Cmp(initialCapacity) != 0 {
existingNode.Status.Capacity[resourceName] = initialCapacity.DeepCopy()
requiresUpdate = true
}
// Add or update allocatable if it the size was previously unsupported or has changed
if !resourceIsSupported || allocatable.Cmp(initialAllocatable) != 0 {
existingNode.Status.Allocatable[resourceName] = initialAllocatable.DeepCopy()
requiresUpdate = true
}
}
for resourceName := range existingNode.Status.Capacity {
if !v1helper.IsHugePageResourceName(resourceName) {
continue
}
// If huge page size no longer is supported, we remove it from the node
if !supportedHugePageResources.Has(string(resourceName)) {
delete(existingNode.Status.Capacity, resourceName)
delete(existingNode.Status.Allocatable, resourceName)
klog.Infof("Removing now unsupported huge page resource named: %s", resourceName)
requiresUpdate = true
}
}
return requiresUpdate
}
// Zeros out extended resource capacity during reconciliation.
func (kl *Kubelet) reconcileExtendedResource(initialNode, node *v1.Node) bool {
requiresUpdate := false