inter-pod affinity prefilter

This commit is contained in:
Abdullah Gharaibeh
2019-12-12 14:03:50 -05:00
parent cfdc365525
commit 2fdf1fa3c1
10 changed files with 486 additions and 2333 deletions

View File

@@ -10,9 +10,11 @@ go_library(
"//pkg/scheduler/algorithm/priorities:go_default_library",
"//pkg/scheduler/framework/plugins/migration:go_default_library",
"//pkg/scheduler/framework/v1alpha1:go_default_library",
"//pkg/scheduler/listers:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)

View File

@@ -20,39 +20,126 @@ import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/pkg/scheduler/algorithm/priorities"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/migration"
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)
// InterPodAffinity is a plugin that checks inter pod affinity
type InterPodAffinity struct {
handle framework.FrameworkHandle
predicate predicates.FitPredicate
snapshotSharedLister schedulerlisters.SharedLister
podAffinityChecker *predicates.PodAffinityChecker
}
var _ framework.PreFilterPlugin = &InterPodAffinity{}
var _ framework.FilterPlugin = &InterPodAffinity{}
var _ framework.ScorePlugin = &InterPodAffinity{}
// Name is the name of the plugin used in the plugin registry and configurations.
const Name = "InterPodAffinity"
const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "InterPodAffinity"
// preFilterStateKey is the key in CycleState to InterPodAffinity pre-computed data.
// Using the name of the plugin will likely help us avoid collisions with other plugins.
preFilterStateKey = "PreFilter" + Name
)
// preFilterState computed at PreFilter and used at Filter.
type preFilterState struct {
meta *predicates.PodAffinityMetadata
}
// Clone the prefilter state.
func (s *preFilterState) Clone() framework.StateData {
copy := &preFilterState{
meta: s.meta.Clone(),
}
return copy
}
// Name returns name of the plugin. It is used in logs, etc.
func (pl *InterPodAffinity) Name() string {
return Name
}
// PreFilter invoked at the prefilter extension point.
func (pl *InterPodAffinity) PreFilter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod) *framework.Status {
var meta *predicates.PodAffinityMetadata
var allNodes []*nodeinfo.NodeInfo
var havePodsWithAffinityNodes []*nodeinfo.NodeInfo
var err error
if allNodes, err = pl.snapshotSharedLister.NodeInfos().List(); err != nil {
return framework.NewStatus(framework.Error, fmt.Sprintf("failed to list NodeInfos: %v", err))
}
if havePodsWithAffinityNodes, err = pl.snapshotSharedLister.NodeInfos().HavePodsWithAffinityList(); err != nil {
return framework.NewStatus(framework.Error, fmt.Sprintf("failed to list NodeInfos with pods with affinity: %v", err))
}
if meta, err = predicates.GetPodAffinityMetadata(pod, allNodes, havePodsWithAffinityNodes); err != nil {
return framework.NewStatus(framework.Error, fmt.Sprintf("Error calculating podAffinityMetadata: %v", err))
}
s := &preFilterState{
meta: meta,
}
cycleState.Write(preFilterStateKey, s)
return nil
}
// PreFilterExtensions returns prefilter extensions, pod add and remove.
func (pl *InterPodAffinity) PreFilterExtensions() framework.PreFilterExtensions {
return pl
}
// AddPod from pre-computed data in cycleState.
func (pl *InterPodAffinity) AddPod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podToAdd *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
meta, err := getPodAffinityMetadata(cycleState)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
meta.UpdateWithPod(podToAdd, podToSchedule, nodeInfo.Node(), 1)
return nil
}
// RemovePod from pre-computed data in cycleState.
func (pl *InterPodAffinity) RemovePod(ctx context.Context, cycleState *framework.CycleState, podToSchedule *v1.Pod, podToRemove *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
meta, err := getPodAffinityMetadata(cycleState)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
meta.UpdateWithPod(podToRemove, podToSchedule, nodeInfo.Node(), -1)
return nil
}
func getPodAffinityMetadata(cycleState *framework.CycleState) (*predicates.PodAffinityMetadata, error) {
c, err := cycleState.Read(preFilterStateKey)
if err != nil {
// The metadata wasn't pre-computed in prefilter. We ignore the error for now since
// Filter is able to handle that by computing it again.
klog.Error(err)
return nil, nil
}
s, ok := c.(*preFilterState)
if !ok {
return nil, fmt.Errorf("%+v convert to interpodaffinity.state error", c)
}
return s.meta, nil
}
// Filter invoked at the filter extension point.
func (pl *InterPodAffinity) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
meta, ok := migration.CovertStateRefToPredMeta(migration.PredicateMetadata(cycleState))
if !ok {
return migration.ErrorToFrameworkStatus(fmt.Errorf("%+v convert to predicates.Metadata error", cycleState))
meta, err := getPodAffinityMetadata(cycleState)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
_, reasons, err := pl.predicate(pod, meta, nodeInfo)
_, reasons, err := pl.podAffinityChecker.InterPodAffinityMatches(pod, meta, nodeInfo)
return migration.PredicateResultToFrameworkStatus(reasons, err)
}
@@ -60,7 +147,7 @@ func (pl *InterPodAffinity) Filter(ctx context.Context, cycleState *framework.Cy
// The "score" returned in this function is the matching number of pods on the `nodeName`,
// it is normalized later.
func (pl *InterPodAffinity) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName)
nodeInfo, err := pl.snapshotSharedLister.NodeInfos().Get(nodeName)
if err != nil {
return 0, framework.NewStatus(framework.Error, fmt.Sprintf("getting node %q from Snapshot: %v", nodeName, err))
}
@@ -73,7 +160,7 @@ func (pl *InterPodAffinity) Score(ctx context.Context, state *framework.CycleSta
// NormalizeScore invoked after scoring all nodes.
func (pl *InterPodAffinity) NormalizeScore(ctx context.Context, state *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status {
meta := migration.PriorityMetadata(state)
err := priorities.CalculateInterPodAffinityPriorityReduce(pod, meta, pl.handle.SnapshotSharedLister(), scores)
err := priorities.CalculateInterPodAffinityPriorityReduce(pod, meta, pl.snapshotSharedLister, scores)
return migration.ErrorToFrameworkStatus(err)
}
@@ -84,8 +171,11 @@ func (pl *InterPodAffinity) ScoreExtensions() framework.ScoreExtensions {
// New initializes a new plugin and returns it.
func New(_ *runtime.Unknown, h framework.FrameworkHandle) (framework.Plugin, error) {
if h.SnapshotSharedLister() == nil {
return nil, fmt.Errorf("SnapshotSharedlister is nil")
}
return &InterPodAffinity{
handle: h,
predicate: predicates.NewPodAffinityPredicate(h.SnapshotSharedLister().NodeInfos(), h.SnapshotSharedLister().Pods()),
snapshotSharedLister: h.SnapshotSharedLister(),
podAffinityChecker: predicates.NewPodAffinityChecker(h.SnapshotSharedLister()),
}, nil
}

View File

@@ -21,7 +21,7 @@ import (
"reflect"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
clientsetfake "k8s.io/client-go/kubernetes/fake"
@@ -782,13 +782,14 @@ func TestSingleNode(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
snapshot := nodeinfosnapshot.NewSnapshot(nodeinfosnapshot.CreateNodeInfoMap(test.pods, []*v1.Node{test.node}))
factory := &predicates.MetadataProducerFactory{}
meta := factory.GetPredicateMetadata(test.pod, snapshot)
state := framework.NewCycleState()
state.Write(migration.PredicatesStateKey, &migration.PredicatesStateData{Reference: meta})
p := &InterPodAffinity{
predicate: predicates.NewPodAffinityPredicate(snapshot.NodeInfos(), snapshot.Pods()),
snapshotSharedLister: snapshot,
podAffinityChecker: predicates.NewPodAffinityChecker(snapshot),
}
state := framework.NewCycleState()
preFilterStatus := p.PreFilter(context.Background(), state, test.pod)
if !preFilterStatus.IsSuccess() {
t.Errorf("prefilter failed with status: %v", preFilterStatus)
}
gotStatus := p.Filter(context.Background(), state, test.pod, snapshot.NodeInfoMap[test.node.Name])
if !reflect.DeepEqual(gotStatus, test.wantStatus) {
@@ -1619,13 +1620,14 @@ func TestMultipleNodes(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
snapshot := nodeinfosnapshot.NewSnapshot(nodeinfosnapshot.CreateNodeInfoMap(test.pods, test.nodes))
for indexNode, node := range test.nodes {
factory := &predicates.MetadataProducerFactory{}
meta := factory.GetPredicateMetadata(test.pod, snapshot)
state := framework.NewCycleState()
state.Write(migration.PredicatesStateKey, &migration.PredicatesStateData{Reference: meta})
p := &InterPodAffinity{
predicate: predicates.NewPodAffinityPredicate(snapshot.NodeInfos(), snapshot.Pods()),
snapshotSharedLister: snapshot,
podAffinityChecker: predicates.NewPodAffinityChecker(snapshot),
}
state := framework.NewCycleState()
preFilterStatus := p.PreFilter(context.Background(), state, test.pod)
if !preFilterStatus.IsSuccess() {
t.Errorf("prefilter failed with status: %v", preFilterStatus)
}
gotStatus := p.Filter(context.Background(), state, test.pod, snapshot.NodeInfoMap[node.Name])
if !reflect.DeepEqual(gotStatus, test.wantStatuses[indexNode]) {
@@ -2279,3 +2281,277 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
})
}
}
func TestStateAddRemovePod(t *testing.T) {
var label1 = map[string]string{
"region": "r1",
"zone": "z11",
}
var label2 = map[string]string{
"region": "r1",
"zone": "z12",
}
var label3 = map[string]string{
"region": "r2",
"zone": "z21",
}
selector1 := map[string]string{"foo": "bar"}
antiAffinityFooBar := &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar"},
},
},
},
TopologyKey: "region",
},
},
}
antiAffinityComplex := &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "buzz"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"bar", "security", "test"},
},
},
},
TopologyKey: "zone",
},
},
}
affinityComplex := &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "buzz"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"bar", "security", "test"},
},
},
},
TopologyKey: "zone",
},
},
}
tests := []struct {
name string
pendingPod *v1.Pod
addedPod *v1.Pod
existingPods []*v1.Pod
nodes []*v1.Node
services []*v1.Service
}{
{
name: "no affinity exist",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{NodeName: "nodeC"},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeB"},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
name: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeB",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
name: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeA",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityComplex,
},
},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
name: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
PodAffinity: affinityComplex,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeA",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityComplex,
},
},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// getMeta creates predicate meta data given the list of pods.
getState := func(pods []*v1.Pod) (*InterPodAffinity, *framework.CycleState, *predicates.PodAffinityMetadata, *nodeinfosnapshot.Snapshot) {
snapshot := nodeinfosnapshot.NewSnapshot(nodeinfosnapshot.CreateNodeInfoMap(pods, test.nodes))
p := &InterPodAffinity{
snapshotSharedLister: snapshot,
podAffinityChecker: predicates.NewPodAffinityChecker(snapshot),
}
cycleState := framework.NewCycleState()
preFilterStatus := p.PreFilter(context.Background(), cycleState, test.pendingPod)
if !preFilterStatus.IsSuccess() {
t.Errorf("prefilter failed with status: %v", preFilterStatus)
}
meta, err := getPodAffinityMetadata(cycleState)
if err != nil {
t.Errorf("failed to get metadata from cycleState: %v", err)
}
return p, cycleState, meta, snapshot
}
// allPodsState is the state produced when all pods, including test.addedPod are given to prefilter.
_, _, allPodsMeta, _ := getState(append(test.existingPods, test.addedPod))
// state is produced for test.existingPods (without test.addedPod).
ipa, state, meta, snapshot := getState(test.existingPods)
// clone the state so that we can compare it later when performing Remove.
originalMeta := meta.Clone()
// Add test.addedPod to state1 and verify it is equal to allPodsState.
if err := ipa.AddPod(context.Background(), state, test.pendingPod, test.addedPod, snapshot.NodeInfoMap[test.addedPod.Spec.NodeName]); err != nil {
t.Errorf("error adding pod to meta: %v", err)
}
if !reflect.DeepEqual(allPodsMeta, meta) {
t.Errorf("State is not equal, got: %v, want: %v", meta, allPodsMeta)
}
// Remove the added pod pod and make sure it is equal to the original state.
if err := ipa.RemovePod(context.Background(), state, test.pendingPod, test.addedPod, snapshot.NodeInfoMap[test.addedPod.Spec.NodeName]); err != nil {
t.Errorf("error removing pod from meta: %v", err)
}
if !reflect.DeepEqual(originalMeta, meta) {
t.Errorf("State is not equal, got: %v, want: %v", meta, originalMeta)
}
})
}
}