Merge pull request #101946 from chendave/balance_allocation

Support extended resource in NodeResourcesBalancedAllocation plugin
This commit is contained in:
Kubernetes Prow Robot
2021-07-06 10:42:19 -07:00
committed by GitHub
27 changed files with 814 additions and 74 deletions

View File

@@ -23,6 +23,8 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
@@ -53,11 +55,10 @@ func (ba *BalancedAllocation) Score(ctx context.Context, state *framework.CycleS
}
// ba.score favors nodes with balanced resource usage rate.
// It calculates the difference between the cpu and memory fraction of capacity,
// and prioritizes the host based on how close the two metrics are to each other.
// Detail: score = (1 - variance(cpuFraction,memoryFraction,volumeFraction)) * MaxNodeScore. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced
// Resource Utilization"
// It calculates the standard deviation for those resources and prioritizes the node based on how close the usage of those resources is to each other.
// Detail: score = (1 - std) * MaxNodeScore, where std is calculated by the root square of Σ((fraction(i)-mean)^2)/len(resources)
// The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
return ba.score(pod, nodeInfo)
}
@@ -67,42 +68,63 @@ func (ba *BalancedAllocation) ScoreExtensions() framework.ScoreExtensions {
}
// NewBalancedAllocation initializes a new plugin and returns it.
func NewBalancedAllocation(_ runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
func NewBalancedAllocation(baArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
args, ok := baArgs.(*config.NodeResourcesBalancedAllocationArgs)
if !ok {
return nil, fmt.Errorf("want args to be of type NodeResourcesBalancedAllocationArgs, got %T", baArgs)
}
if err := validation.ValidateNodeResourcesBalancedAllocationArgs(nil, args); err != nil {
return nil, err
}
resToWeightMap := make(resourceToWeightMap)
for _, resource := range args.Resources {
resToWeightMap[v1.ResourceName(resource.Name)] = resource.Weight
}
return &BalancedAllocation{
handle: h,
resourceAllocationScorer: resourceAllocationScorer{
Name: BalancedAllocationName,
scorer: balancedResourceScorer,
resourceToWeightMap: defaultRequestedRatioResources,
resourceToWeightMap: resToWeightMap,
enablePodOverhead: fts.EnablePodOverhead,
},
}, nil
}
// todo: use resource weights in the scorer function
func balancedResourceScorer(requested, allocable resourceToValueMap) int64 {
cpuFraction := fractionOfCapacity(requested[v1.ResourceCPU], allocable[v1.ResourceCPU])
memoryFraction := fractionOfCapacity(requested[v1.ResourceMemory], allocable[v1.ResourceMemory])
// fractions might be greater than 1 because pods with no requests get minimum
// values.
if cpuFraction > 1 {
cpuFraction = 1
}
if memoryFraction > 1 {
memoryFraction = 1
var resourceToFractions []float64
var totalFraction float64
for name, value := range requested {
fraction := float64(value) / float64(allocable[name])
if fraction > 1 {
fraction = 1
}
totalFraction += fraction
resourceToFractions = append(resourceToFractions, fraction)
}
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multiplying the absolute value of the difference by `MaxNodeScore` scales the value to
// 0-MaxNodeScore with 0 representing well balanced allocation and `MaxNodeScore` poorly balanced. Subtracting it from
// `MaxNodeScore` leads to the score which also scales from 0 to `MaxNodeScore` while `MaxNodeScore` representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
return int64((1 - diff) * float64(framework.MaxNodeScore))
}
std := 0.0
func fractionOfCapacity(requested, capacity int64) float64 {
if capacity == 0 {
return 1
// For most cases, resources are limited to cpu and memory, the std could be simplified to std := (fraction1-fraction2)/2
// len(fractions) > 2: calculate std based on the well-known formula - root square of Σ((fraction(i)-mean)^2)/len(fractions)
// Otherwise, set the std to zero is enough.
if len(resourceToFractions) == 2 {
std = math.Abs((resourceToFractions[0] - resourceToFractions[1]) / 2)
} else if len(resourceToFractions) > 2 {
mean := totalFraction / float64(len(resourceToFractions))
var sum float64
for _, fraction := range resourceToFractions {
sum = sum + (fraction-mean)*(fraction-mean)
}
std = math.Sqrt(sum / float64(len(resourceToFractions)))
}
return float64(requested) / float64(capacity)
// STD (standard deviation) is always a positive value. 1-deviation lets the score to be higher for node which has least deviation and
// multiplying it with `MaxNodeScore` provides the scaling factor needed.
return int64((1 - std) * float64(framework.MaxNodeScore))
}

View File

@@ -24,6 +24,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
"k8s.io/kubernetes/pkg/scheduler/framework/runtime"
@@ -31,6 +32,28 @@ import (
)
func TestNodeResourcesBalancedAllocation(t *testing.T) {
cpuAndMemoryAndGPU := v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1000m"),
v1.ResourceMemory: resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("2000m"),
v1.ResourceMemory: resource.MustParse("3000"),
"nvidia.com/gpu": resource.MustParse("3"),
},
},
},
},
NodeName: "machine1",
}
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
@@ -99,91 +122,113 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
NodeName: "machine1",
Containers: []v1.Container{{}},
}
defaultResourceBalancedAllocationSet := []config.ResourceSpec{
{Name: string(v1.ResourceCPU), Weight: 1},
{Name: string(v1.ResourceMemory), Weight: 1},
}
scalarResource := map[string]int64{
"nvidia.com/gpu": 8,
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList framework.NodeScoreList
name string
args config.NodeResourcesBalancedAllocationArgs
}{
{
// Node1 scores (remaining resources) on 0-MaxNodeScore scale
// CPU Fraction: 0 / 4000 = 0%
// Memory Fraction: 0 / 10000 = 0%
// Node1 Score: MaxNodeScore - (0-0)*MaxNodeScore = MaxNodeScore
// Node1 Score: (1-0) * MaxNodeScore = MaxNodeScore
// Node2 scores (remaining resources) on 0-MaxNodeScore scale
// CPU Fraction: 0 / 4000 = 0 %
// Memory Fraction: 0 / 10000 = 0%
// Node2 Score: MaxNodeScore - (0-0)*MaxNodeScore = MaxNodeScore
// Node2 Score: (1-0) * MaxNodeScore = MaxNodeScore
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: framework.MaxNodeScore}},
name: "nothing scheduled, nothing requested",
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
{
// Node1 scores on 0-MaxNodeScore scale
// CPU Fraction: 3000 / 4000= 75%
// Memory Fraction: 5000 / 10000 = 50%
// Node1 Score: MaxNodeScore - (0.75-0.5)*MaxNodeScore = 75
// Node1 std: (0.75 - 0.5) / 2 = 0.125
// Node1 Score: (1 - 0.125)*MaxNodeScore = 87
// Node2 scores on 0-MaxNodeScore scale
// CPU Fraction: 3000 / 6000= 50%
// Memory Fraction: 5000/10000 = 50%
// Node2 Score: MaxNodeScore - (0.5-0.5)*MaxNodeScore = MaxNodeScore
// Node2 std: 0
// Node2 Score: (1-0) * MaxNodeScore = MaxNodeScore
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 75}, {Name: "machine2", Score: framework.MaxNodeScore}},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 87}, {Name: "machine2", Score: framework.MaxNodeScore}},
name: "nothing scheduled, resources requested, differently sized machines",
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
{
// Node1 scores on 0-MaxNodeScore scale
// CPU Fraction: 0 / 4000= 0%
// Memory Fraction: 0 / 10000 = 0%
// Node1 Score: MaxNodeScore - (0-0)*MaxNodeScore = MaxNodeScore
// Node1 std: 0
// Node1 Score: (1-0) * MaxNodeScore = MaxNodeScore
// Node2 scores on 0-MaxNodeScore scale
// CPU Fraction: 0 / 4000= 0%
// Memory Fraction: 0 / 10000 = 0%
// Node2 Score: MaxNodeScore - (0-0)*MaxNodeScore= MaxNodeScore
// Node2 std: 0
// Node2 Score: (1-0) * MaxNodeScore = MaxNodeScore
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: framework.MaxNodeScore}},
name: "no resources requested, pods scheduled",
name: "no resources requested, pods without container scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
{
// Node1 scores on 0-MaxNodeScore scale
// CPU Fraction: 300 / 250 = 100%
// Memory Fraction: 600 / 10000 = 60%
// Node1 Score: MaxNodeScore - (100-60)*MaxNodeScore = 60
// Node1 std: (1 - 0.6) / 2 = 0.2
// Node1 Score: (1 - 0.2)*MaxNodeScore = 80
// Node2 scores on 0-MaxNodeScore scale
// CPU Fraction: 100 / 250 = 40%
// Memory Fraction: 200 / 10000 = 20%
// Node2 Score: MaxNodeScore - (40-20)*MaxNodeScore= 80
// Node2 std: (0.4 - 0.2) / 2 = 0.1
// Node2 Score: (1 - 0.1)*MaxNodeScore = 90
pod: &v1.Pod{Spec: nonZeroContainer},
nodes: []*v1.Node{makeNode("machine1", 250, 1000*1024*1024), makeNode("machine2", 250, 1000*1024*1024)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 60}, {Name: "machine2", Score: 80}},
name: "no resources requested, pods scheduled",
expectedList: []framework.NodeScore{{Name: "machine1", Score: 80}, {Name: "machine2", Score: 90}},
name: "no resources requested, pods with container scheduled",
pods: []*v1.Pod{
{Spec: nonZeroContainer1},
{Spec: nonZeroContainer1},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
{
// Node1 scores on 0-MaxNodeScore scale
// CPU Fraction: 6000 / 10000 = 60%
// Memory Fraction: 0 / 20000 = 0%
// Node1 Score: MaxNodeScore - (0.6-0)*MaxNodeScore = 40
// Node1 std: (0.6 - 0) / 2 = 0.3
// Node1 Score: (1 - 0.3)*MaxNodeScore = 70
// Node2 scores on 0-MaxNodeScore scale
// CPU Fraction: 6000 / 10000 = 60%
// Memory Fraction: 5000 / 20000 = 25%
// Node2 Score: MaxNodeScore - (0.6-0.25)*MaxNodeScore = 65
// Node2 std: (0.6 - 0.25) / 2 = 0.175
// Node2 Score: (1 - 0.175)*MaxNodeScore = 82
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 40}, {Name: "machine2", Score: 65}},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 70}, {Name: "machine2", Score: 82}},
name: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
@@ -191,60 +236,139 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
{Spec: cpuOnly2, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
{
// Node1 scores on 0-MaxNodeScore scale
// CPU Fraction: 6000 / 10000 = 60%
// Memory Fraction: 5000 / 20000 = 25%
// Node1 Score: MaxNodeScore - (0.6-0.25)*MaxNodeScore = 65
// Node1 std: (0.6 - 0.25) / 2 = 0.175
// Node1 Score: (1 - 0.175)*MaxNodeScore = 82
// Node2 scores on 0-MaxNodeScore scale
// CPU Fraction: 6000 / 10000 = 60%
// Memory Fraction: 10000 / 20000 = 50%
// Node2 Score: MaxNodeScore - (0.6-0.5)*MaxNodeScore = 90
// Node2 std: (0.6 - 0.5) / 2 = 0.05
// Node2 Score: (1 - 0.05)*MaxNodeScore = 95
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 65}, {Name: "machine2", Score: 90}},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 82}, {Name: "machine2", Score: 95}},
name: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
{
// Node1 scores on 0-MaxNodeScore scale
// CPU Fraction: 6000 / 10000 = 60%
// Memory Fraction: 5000 / 20000 = 25%
// Node1 Score: MaxNodeScore - (0.6-0.25)*MaxNodeScore = 65
// Node1 std: (0.6 - 0.25) / 2 = 0.175
// Node1 Score: (1 - 0.175)*MaxNodeScore = 82
// Node2 scores on 0-MaxNodeScore scale
// CPU Fraction: 6000 / 10000 = 60%
// Memory Fraction: 10000 / 50000 = 20%
// Node2 Score: MaxNodeScore - (0.6-0.2)*MaxNodeScore = 60
// Node2 std: (0.6 - 0.2) / 2 = 0.2
// Node2 Score: (1 - 0.2)*MaxNodeScore = 80
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 65}, {Name: "machine2", Score: 60}},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 82}, {Name: "machine2", Score: 80}},
name: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
{
// Node1 scores on 0-MaxNodeScore scale
// CPU Fraction: 6000 / 6000 = 1
// Memory Fraction: 0 / 10000 = 0
// Node1 std: (1 - 0) / 2 = 0.5
// Node1 Score: (1 - 0.5)*MaxNodeScore = 50
// Node1 Score: MaxNodeScore - (1 - 0) * MaxNodeScore = 0
// Node2 scores on 0-MaxNodeScore scale
// CPU Fraction: 6000 / 6000 = 1
// Memory Fraction 5000 / 10000 = 50%
// Node2 Score: MaxNodeScore - (1 - 0.5) * MaxNodeScore = 50
// Node2 std: (1 - 0.5) / 2 = 0.25
// Node2 Score: (1 - 0.25)*MaxNodeScore = 75
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 6000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 0}, {Name: "machine2", Score: 50}},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 50}, {Name: "machine2", Score: 75}},
name: "requested resources at node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 100}, {Name: "machine2", Score: 100}},
name: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
},
// Node1 scores on 0-MaxNodeScore scale
// CPU Fraction: 3100 / 3500 = 88.57%
// Memory Fraction: 5000 / 40000 = 12.5%
// GPU Fraction: 4 / 8 = 0.5%
// Node1 std: sqrt(((0.8857 - 0.503) * (0.8857 - 0.503) + (0.503 - 0.125) * (0.503 - 0.125) + (0.503 - 0.5) * (0.503 - 0.5)) / 3) = 0.3105
// Node1 Score: (1 - 0.3105)*MaxNodeScore = 68
// Node2 scores on 0-MaxNodeScore scale
// CPU Fraction: 3100 / 3500 = 88.57%
// Memory Fraction: 5000 / 40000 = 12.5%
// GPU Fraction: 1 / 8 = 12.5%
// Node2 std: sqrt(((0.8875 - 0.378) * (0.8875 - 0.378) + (0.378 - 0.125) * (0.378 - 0.125)) + (0.378 - 0.125) * (0.378 - 0.125)) / 3) = 0.358
// Node2 Score: (1 - 0.358)*MaxNodeScore = 64
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("0"),
"nvidia.com/gpu": resource.MustParse("1"),
},
},
},
},
},
},
nodes: []*v1.Node{makeNodeWithExtendedResource("machine1", 3500, 40000, scalarResource), makeNodeWithExtendedResource("machine2", 3500, 40000, scalarResource)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 68}, {Name: "machine2", Score: 64}},
name: "include scalar resource on a node for balanced resource allocation",
pods: []*v1.Pod{
{Spec: cpuAndMemory},
{Spec: cpuAndMemoryAndGPU},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: []config.ResourceSpec{
{Name: string(v1.ResourceCPU), Weight: 1},
{Name: string(v1.ResourceMemory), Weight: 1},
{Name: "nvidia.com/gpu", Weight: 1},
}},
},
// Only one node (machine1) has the scalar resource, pod doesn't request the scalar resource and the scalar resource should be skipped for consideration.
// Node1: std = 0, score = 100
// Node2: std = 0, score = 100
{
pod: &v1.Pod{Spec: v1.PodSpec{Containers: []v1.Container{{}}}},
nodes: []*v1.Node{makeNodeWithExtendedResource("machine1", 3500, 40000, scalarResource), makeNode("machine2", 3500, 40000)},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 100}, {Name: "machine2", Score: 100}},
name: "node without the scalar resource results to a higher score",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuOnly2},
},
args: config.NodeResourcesBalancedAllocationArgs{Resources: []config.ResourceSpec{
{Name: string(v1.ResourceCPU), Weight: 1},
{Name: "nvidia.com/gpu", Weight: 1},
}},
},
}
@@ -252,8 +376,7 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
snapshot := cache.NewSnapshot(test.pods, test.nodes)
fh, _ := runtime.NewFramework(nil, nil, runtime.WithSnapshotSharedLister(snapshot))
p, _ := NewBalancedAllocation(nil, fh, feature.Features{EnablePodOverhead: true})
p, _ := NewBalancedAllocation(&test.args, fh, feature.Features{EnablePodOverhead: true})
for i := range test.nodes {
hostResult, err := p.(framework.ScorePlugin).Score(context.Background(), nil, test.pod, test.nodes[i].Name)
if err != nil {

View File

@@ -30,9 +30,6 @@ type resourceToWeightMap map[v1.ResourceName]int64
// scorer is decorator for resourceAllocationScorer
type scorer func(args *config.NodeResourcesFitArgs) *resourceAllocationScorer
// defaultRequestedRatioResources is used to set default requestToWeight map for CPU and memory
var defaultRequestedRatioResources = resourceToWeightMap{v1.ResourceMemory: 1, v1.ResourceCPU: 1}
// resourceAllocationScorer contains information to calculate resource allocation score.
type resourceAllocationScorer struct {
Name string
@@ -42,7 +39,7 @@ type resourceAllocationScorer struct {
enablePodOverhead bool
}
// resourceToValueMap contains resource name and score.
// resourceToValueMap is keyed with resource name and valued with quantity.
type resourceToValueMap map[v1.ResourceName]int64
// score will use `scorer` function to calculate the score.