scheduler: change fit predicates and priority func as needed for optimization

This commit is contained in:
Hongchao Deng
2016-01-28 12:14:45 -08:00
parent 9942f6bf3e
commit 9236e4a0b4
17 changed files with 343 additions and 255 deletions

View File

@@ -22,6 +22,7 @@ import (
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type NodeAffinity struct {
@@ -40,7 +41,7 @@ func NewNodeAffinityPriority(nodeLister algorithm.NodeLister) algorithm.Priority
// it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms
// the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher
// score the node gets.
func (s *NodeAffinity) CalculateNodeAffinityPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
func (s *NodeAffinity) CalculateNodeAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
var maxCount int
counts := map[string]int{}

View File

@@ -23,6 +23,7 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNodeAffinityPriority(t *testing.T) {
@@ -156,7 +157,7 @@ func TestNodeAffinityPriority(t *testing.T) {
for _, test := range tests {
nodeAffinity := NodeAffinity{nodeLister: algorithm.FakeNodeLister(api.NodeList{Items: test.nodes})}
list, err := nodeAffinity.CalculateNodeAffinityPriority(test.pod, nil, nil, algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
list, err := nodeAffinity.CalculateNodeAffinityPriority(test.pod, schedulercache.CreateNodeNameToInfoMap(nil), algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
if err != nil {
t.Errorf("unexpected error: %v", err)
}

View File

@@ -25,6 +25,7 @@ import (
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// the unused capacity is calculated on a scale of 0-10
@@ -114,7 +115,7 @@ func calculateResourceOccupancy(pod *api.Pod, node api.Node, pods []*api.Pod) sc
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the minimum of the average of the fraction of requested to capacity.
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
func LeastRequestedPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
func LeastRequestedPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
nodes, err := nodeLister.List()
if err != nil {
return schedulerapi.HostPriorityList{}, err
@@ -122,7 +123,7 @@ func LeastRequestedPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod,
list := schedulerapi.HostPriorityList{}
for _, node := range nodes.Items {
list = append(list, calculateResourceOccupancy(pod, node, machinesToPods[node.Name]))
list = append(list, calculateResourceOccupancy(pod, node, nodeNameToInfo[node.Name].Pods()))
}
return list, nil
}
@@ -143,7 +144,7 @@ func NewNodeLabelPriority(label string, presence bool) algorithm.PriorityFunctio
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
// If presence is false, prioritizes nodes that do not have the specified label.
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
var score int
nodes, err := nodeLister.List()
if err != nil {
@@ -182,7 +183,7 @@ const (
// based on the total size of those images.
// - If none of the images are present, this node will be given the lowest priority.
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
func ImageLocalityPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
func ImageLocalityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
sumSizeMap := make(map[string]int64)
nodes, err := nodeLister.List()
@@ -248,7 +249,7 @@ func calculateScoreFromSize(sumSize int64) int {
// close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
func BalancedResourceAllocation(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
func BalancedResourceAllocation(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
nodes, err := nodeLister.List()
if err != nil {
return schedulerapi.HostPriorityList{}, err
@@ -256,7 +257,7 @@ func BalancedResourceAllocation(pod *api.Pod, machinesToPods map[string][]*api.P
list := schedulerapi.HostPriorityList{}
for _, node := range nodes.Items {
list = append(list, calculateBalancedResourceAllocation(pod, node, machinesToPods[node.Name]))
list = append(list, calculateBalancedResourceAllocation(pod, node, nodeNameToInfo[node.Name].Pods()))
}
return list, nil
}

View File

@@ -26,8 +26,8 @@ import (
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/plugin/pkg/scheduler"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func makeNode(node string, milliCPU, memory int64) api.Node {
@@ -132,18 +132,15 @@ func TestZeroRequest(t *testing.T) {
const expectedPriority int = 25
for _, test := range tests {
m2p, err := predicates.MapPodsToMachines(algorithm.FakePodLister(test.pods))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
list, err := scheduler.PrioritizeNodes(
test.pod,
m2p,
nodeNameToInfo,
algorithm.FakePodLister(test.pods),
// This should match the configuration in defaultPriorities() in
// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
// to test what's actually in production.
[]algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: NewSelectorSpreadPriority(algorithm.FakeServiceLister([]api.Service{}), algorithm.FakeControllerLister([]api.ReplicationController{})), Weight: 1}},
[]algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: NewSelectorSpreadPriority(algorithm.FakePodLister(test.pods), algorithm.FakeServiceLister([]api.Service{}), algorithm.FakeControllerLister([]api.ReplicationController{})), Weight: 1}},
algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}), []algorithm.SchedulerExtender{})
if err != nil {
t.Errorf("unexpected error: %v", err)
@@ -387,11 +384,8 @@ func TestLeastRequested(t *testing.T) {
}
for _, test := range tests {
m2p, err := predicates.MapPodsToMachines(algorithm.FakePodLister(test.pods))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
list, err := LeastRequestedPriority(test.pod, m2p, algorithm.FakePodLister(test.pods), algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
list, err := LeastRequestedPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -485,7 +479,7 @@ func TestNewNodeLabelPriority(t *testing.T) {
label: test.label,
presence: test.presence,
}
list, err := prioritizer.CalculateNodeLabelPriority(nil, map[string][]*api.Pod{}, nil, algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
list, err := prioritizer.CalculateNodeLabelPriority(nil, map[string]*schedulercache.NodeInfo{}, algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -723,11 +717,8 @@ func TestBalancedResourceAllocation(t *testing.T) {
}
for _, test := range tests {
m2p, err := predicates.MapPodsToMachines(algorithm.FakePodLister(test.pods))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
list, err := BalancedResourceAllocation(test.pod, m2p, algorithm.FakePodLister(test.pods), algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
list, err := BalancedResourceAllocation(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -870,11 +861,8 @@ func TestImageLocalityPriority(t *testing.T) {
}
for _, test := range tests {
m2p, err := predicates.MapPodsToMachines(algorithm.FakePodLister(test.pods))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
list, err := ImageLocalityPriority(test.pod, m2p, algorithm.FakePodLister(test.pods), algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
list, err := ImageLocalityPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(api.NodeList{Items: test.nodes}))
if err != nil {
t.Errorf("unexpected error: %v", err)
}

View File

@@ -23,6 +23,7 @@ import (
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// The maximum priority value to give to a node
@@ -34,12 +35,14 @@ const maxPriority = 10
const zoneWeighting = 2.0 / 3.0
type SelectorSpread struct {
podLister algorithm.PodLister
serviceLister algorithm.ServiceLister
controllerLister algorithm.ControllerLister
}
func NewSelectorSpreadPriority(serviceLister algorithm.ServiceLister, controllerLister algorithm.ControllerLister) algorithm.PriorityFunction {
func NewSelectorSpreadPriority(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, controllerLister algorithm.ControllerLister) algorithm.PriorityFunction {
selectorSpread := &SelectorSpread{
podLister: podLister,
serviceLister: serviceLister,
controllerLister: controllerLister,
}
@@ -73,7 +76,7 @@ func getZoneKey(node *api.Node) string {
// i.e. it pushes the scheduler towards a node where there's the smallest number of
// pods which match the same service selectors or RC selectors as the pod being scheduled.
// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
var nsPods []*api.Pod
selectors := make([]labels.Selector, 0)
@@ -91,7 +94,7 @@ func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, machinesToPods ma
}
if len(selectors) > 0 {
pods, err := podLister.List(labels.Everything())
pods, err := s.podLister.List(labels.Everything())
if err != nil {
return nil, err
}
@@ -198,12 +201,14 @@ func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, machinesToPods ma
}
type ServiceAntiAffinity struct {
podLister algorithm.PodLister
serviceLister algorithm.ServiceLister
label string
}
func NewServiceAntiAffinityPriority(serviceLister algorithm.ServiceLister, label string) algorithm.PriorityFunction {
func NewServiceAntiAffinityPriority(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, label string) algorithm.PriorityFunction {
antiAffinity := &ServiceAntiAffinity{
podLister: podLister,
serviceLister: serviceLister,
label: label,
}
@@ -213,7 +218,7 @@ func NewServiceAntiAffinityPriority(serviceLister algorithm.ServiceLister, label
// CalculateAntiAffinityPriority spreads pods by minimizing the number of pods belonging to the same service
// on machines with the same value for a particular label.
// The label to be considered is provided to the struct (ServiceAntiAffinity).
func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
var nsServicePods []*api.Pod
services, err := s.serviceLister.GetPodServices(pod)
@@ -221,7 +226,7 @@ func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *api.Pod, machin
// just use the first service and get the other pods within the service
// TODO: a separate predicate can be created that tries to handle all services for the pod
selector := labels.SelectorFromSet(services[0].Spec.Selector)
pods, err := podLister.List(selector)
pods, err := s.podLister.List(selector)
if err != nil {
return nil, err
}

View File

@@ -24,8 +24,8 @@ import (
"k8s.io/kubernetes/pkg/api"
wellknownlabels "k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestSelectorSpreadPriority(t *testing.T) {
@@ -219,12 +219,9 @@ func TestSelectorSpreadPriority(t *testing.T) {
}
for _, test := range tests {
m2p, err := predicates.MapPodsToMachines(algorithm.FakePodLister(test.pods))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
selectorSpread := SelectorSpread{serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs)}
list, err := selectorSpread.CalculateSpreadPriority(test.pod, m2p, algorithm.FakePodLister(test.pods), algorithm.FakeNodeLister(makeNodeList(test.nodes)))
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
selectorSpread := SelectorSpread{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs)}
list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(makeNodeList(test.nodes)))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -422,12 +419,9 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
}
for _, test := range tests {
selectorSpread := SelectorSpread{serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs)}
m2p, err := predicates.MapPodsToMachines(algorithm.FakePodLister(test.pods))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
list, err := selectorSpread.CalculateSpreadPriority(test.pod, m2p, algorithm.FakePodLister(test.pods), algorithm.FakeNodeLister(makeLabeledNodeList(labeledNodes)))
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
selectorSpread := SelectorSpread{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs)}
list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(makeLabeledNodeList(labeledNodes)))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -597,12 +591,9 @@ func TestZoneSpreadPriority(t *testing.T) {
}
for _, test := range tests {
m2p, err := predicates.MapPodsToMachines(algorithm.FakePodLister(test.pods))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
zoneSpread := ServiceAntiAffinity{serviceLister: algorithm.FakeServiceLister(test.services), label: "zone"}
list, err := zoneSpread.CalculateAntiAffinityPriority(test.pod, m2p, algorithm.FakePodLister(test.pods), algorithm.FakeNodeLister(makeLabeledNodeList(test.nodes)))
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
zoneSpread := ServiceAntiAffinity{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), label: "zone"}
list, err := zoneSpread.CalculateAntiAffinityPriority(test.pod, nodeNameToInfo, algorithm.FakeNodeLister(makeLabeledNodeList(test.nodes)))
if err != nil {
t.Errorf("unexpected error: %v", err)
}