inter-pod affinity prefilter

This commit is contained in:
Abdullah Gharaibeh
2019-12-12 14:03:50 -05:00
parent cfdc365525
commit 2fdf1fa3c1
10 changed files with 486 additions and 2333 deletions

View File

@@ -179,7 +179,8 @@ func (m *serviceAffinityMetadata) clone() *serviceAffinityMetadata {
return &copy
}
type podAffinityMetadata struct {
// PodAffinityMetadata pre-computed state for inter-pod affinity predicate.
type PodAffinityMetadata struct {
// A map of topology pairs to the number of existing pods that has anti-affinity terms that match the "pod".
topologyToMatchedExistingAntiAffinityTerms topologyToMatchedTermCount
// A map of topology pairs to the number of existing pods that match the affinity terms of the "pod".
@@ -225,7 +226,8 @@ func (m topologyToMatchedTermCount) updateWithAntiAffinityTerms(targetPod *v1.Po
}
}
func (m *podAffinityMetadata) updatePod(updatedPod, pod *v1.Pod, node *v1.Node, multiplier int64) error {
// UpdateWithPod updates the metadata counters with the (anti)affinity matches for the given pod.
func (m *PodAffinityMetadata) UpdateWithPod(updatedPod, pod *v1.Pod, node *v1.Node, multiplier int64) error {
if m == nil {
return nil
}
@@ -265,12 +267,13 @@ func (m *podAffinityMetadata) updatePod(updatedPod, pod *v1.Pod, node *v1.Node,
return nil
}
func (m *podAffinityMetadata) clone() *podAffinityMetadata {
// Clone makes a deep copy of PodAffinityMetadata.
func (m *PodAffinityMetadata) Clone() *PodAffinityMetadata {
if m == nil {
return nil
}
copy := podAffinityMetadata{}
copy := PodAffinityMetadata{}
copy.topologyToMatchedAffinityTerms = m.topologyToMatchedAffinityTerms.clone()
copy.topologyToMatchedAntiAffinityTerms = m.topologyToMatchedAntiAffinityTerms.clone()
copy.topologyToMatchedExistingAntiAffinityTerms = m.topologyToMatchedExistingAntiAffinityTerms.clone()
@@ -327,7 +330,6 @@ type predicateMetadata struct {
evenPodsSpreadMetadata *evenPodsSpreadMetadata
serviceAffinityMetadata *serviceAffinityMetadata
podAffinityMetadata *podAffinityMetadata
podFitsResourcesMetadata *podFitsResourcesMetadata
podFitsHostPortsMetadata *podFitsHostPortsMetadata
}
@@ -373,7 +375,6 @@ func (f *MetadataProducerFactory) GetPredicateMetadata(pod *v1.Pod, sharedLister
}
var allNodes []*schedulernodeinfo.NodeInfo
var havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo
if sharedLister != nil {
var err error
allNodes, err = sharedLister.NodeInfos().List()
@@ -381,12 +382,6 @@ func (f *MetadataProducerFactory) GetPredicateMetadata(pod *v1.Pod, sharedLister
klog.Errorf("failed to list NodeInfos: %v", err)
return nil
}
havePodsWithAffinityNodes, err = sharedLister.NodeInfos().HavePodsWithAffinityList()
if err != nil {
klog.Errorf("failed to list NodeInfos: %v", err)
return nil
}
}
// evenPodsSpreadMetadata represents how existing pods match "pod"
@@ -397,16 +392,9 @@ func (f *MetadataProducerFactory) GetPredicateMetadata(pod *v1.Pod, sharedLister
return nil
}
podAffinityMetadata, err := getPodAffinityMetadata(pod, allNodes, havePodsWithAffinityNodes)
if err != nil {
klog.Errorf("Error calculating podAffinityMetadata: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
evenPodsSpreadMetadata: evenPodsSpreadMetadata,
podAffinityMetadata: podAffinityMetadata,
podFitsResourcesMetadata: getPodFitsResourcesMetedata(pod),
podFitsHostPortsMetadata: getPodFitsHostPortsMetadata(pod),
}
@@ -429,7 +417,8 @@ func getPodFitsResourcesMetedata(pod *v1.Pod) *podFitsResourcesMetadata {
}
}
func getPodAffinityMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo, havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo) (*podAffinityMetadata, error) {
// GetPodAffinityMetadata computes inter-pod affinity metadata.
func GetPodAffinityMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo, havePodsWithAffinityNodes []*schedulernodeinfo.NodeInfo) (*PodAffinityMetadata, error) {
// existingPodAntiAffinityMap will be used later for efficient check on existing pods' anti-affinity
existingPodAntiAffinityMap, err := getTPMapMatchingExistingAntiAffinity(pod, havePodsWithAffinityNodes)
if err != nil {
@@ -442,7 +431,7 @@ func getPodAffinityMetadata(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo,
return nil, err
}
return &podAffinityMetadata{
return &PodAffinityMetadata{
topologyToMatchedAffinityTerms: incomingPodAffinityMap,
topologyToMatchedAntiAffinityTerms: incomingPodAntiAffinityMap,
topologyToMatchedExistingAntiAffinityTerms: existingPodAntiAffinityMap,
@@ -619,7 +608,6 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod, node *v1.Node) erro
if deletedPodFullName == schedutil.GetPodFullName(meta.pod) {
return fmt.Errorf("deletedPod and meta.pod must not be the same")
}
meta.podAffinityMetadata.updatePod(deletedPod, meta.pod, node, -1)
meta.evenPodsSpreadMetadata.removePod(deletedPod, meta.pod, node)
meta.serviceAffinityMetadata.removePod(deletedPod, node)
@@ -637,9 +625,6 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, node *v1.Node) error {
return fmt.Errorf("node not found")
}
if err := meta.podAffinityMetadata.updatePod(addedPod, meta.pod, node, 1); err != nil {
return err
}
// Update meta.evenPodsSpreadMetadata if meta.pod has hard spread constraints
// and addedPod matches that
meta.evenPodsSpreadMetadata.addPod(addedPod, meta.pod, node)
@@ -657,7 +642,6 @@ func (meta *predicateMetadata) ShallowCopy() Metadata {
podBestEffort: meta.podBestEffort,
}
newPredMeta.podFitsHostPortsMetadata = meta.podFitsHostPortsMetadata.clone()
newPredMeta.podAffinityMetadata = meta.podAffinityMetadata.clone()
newPredMeta.evenPodsSpreadMetadata = meta.evenPodsSpreadMetadata.clone()
newPredMeta.serviceAffinityMetadata = meta.serviceAffinityMetadata.clone()
newPredMeta.podFitsResourcesMetadata = meta.podFitsResourcesMetadata.clone()

View File

@@ -70,16 +70,7 @@ func predicateMetadataEquivalent(meta1, meta2 *predicateMetadata) error {
for !reflect.DeepEqual(meta1.podFitsHostPortsMetadata.podPorts, meta2.podFitsHostPortsMetadata.podPorts) {
return fmt.Errorf("podPorts are not equal")
}
if !reflect.DeepEqual(meta1.podAffinityMetadata.topologyToMatchedAffinityTerms, meta2.podAffinityMetadata.topologyToMatchedAffinityTerms) {
return fmt.Errorf("topologyToMatchedAffinityTerms are not equal")
}
if !reflect.DeepEqual(meta1.podAffinityMetadata.topologyToMatchedAntiAffinityTerms, meta2.podAffinityMetadata.topologyToMatchedAntiAffinityTerms) {
return fmt.Errorf("topologyToMatchedAntiAffinityTerms are not equal")
}
if !reflect.DeepEqual(meta1.podAffinityMetadata.topologyToMatchedExistingAntiAffinityTerms,
meta2.podAffinityMetadata.topologyToMatchedExistingAntiAffinityTerms) {
return fmt.Errorf("topologyToMatchedExistingAntiAffinityTerms are not equal, got: %v, want: %v", meta1.podAffinityMetadata.topologyToMatchedExistingAntiAffinityTerms, meta2.podAffinityMetadata.topologyToMatchedExistingAntiAffinityTerms)
}
if meta1.serviceAffinityMetadata != nil {
sortablePods1 := sortablePods(meta1.serviceAffinityMetadata.matchingPodList)
sort.Sort(sortablePods1)
@@ -114,78 +105,6 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
"zone": "z21",
}
selector1 := map[string]string{"foo": "bar"}
antiAffinityFooBar := &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar"},
},
},
},
TopologyKey: "region",
},
},
}
antiAffinityComplex := &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "buzz"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"bar", "security", "test"},
},
},
},
TopologyKey: "zone",
},
},
}
affinityComplex := &v1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "buzz"},
},
},
},
TopologyKey: "region",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpNotIn,
Values: []string{"bar", "security", "test"},
},
},
},
TopologyKey: "zone",
},
},
}
tests := []struct {
name string
@@ -218,39 +137,6 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
name: "metadata anti-affinity terms are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeB",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
name: "metadata service-affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
@@ -275,75 +161,6 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
name: "metadata anti-affinity terms and service affinity data are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeA",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityComplex,
},
},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
{
name: "metadata matching pod affinity and anti-affinity are updated correctly after adding and removing a pod",
pendingPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pending", Labels: selector1},
},
existingPods: []*v1.Pod{
{ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: selector1},
Spec: v1.PodSpec{NodeName: "nodeA"},
},
{ObjectMeta: metav1.ObjectMeta{Name: "p2"},
Spec: v1.PodSpec{
NodeName: "nodeC",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityFooBar,
PodAffinity: affinityComplex,
},
},
},
},
addedPod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "addedPod", Labels: selector1},
Spec: v1.PodSpec{
NodeName: "nodeA",
Affinity: &v1.Affinity{
PodAntiAffinity: antiAffinityComplex,
},
},
},
services: []*v1.Service{{Spec: v1.ServiceSpec{Selector: selector1}}},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: label2}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeC", Labels: label3}},
},
},
}
for _, test := range tests {
@@ -385,6 +202,31 @@ func TestPredicateMetadata_AddRemovePod(t *testing.T) {
}
}
func TestPodAffinityMetadata_Clone(t *testing.T) {
source := &PodAffinityMetadata{
topologyToMatchedExistingAntiAffinityTerms: topologyToMatchedTermCount{
{key: "name", value: "machine1"}: 1,
{key: "name", value: "machine2"}: 1,
},
topologyToMatchedAffinityTerms: topologyToMatchedTermCount{
{key: "name", value: "nodeA"}: 1,
{key: "name", value: "nodeC"}: 2,
},
topologyToMatchedAntiAffinityTerms: topologyToMatchedTermCount{
{key: "name", value: "nodeN"}: 3,
{key: "name", value: "nodeM"}: 1,
},
}
clone := source.Clone()
if clone == source {
t.Errorf("Clone returned the exact same object!")
}
if !reflect.DeepEqual(clone, source) {
t.Errorf("Copy is not equal to source!")
}
}
// TestPredicateMetadata_ShallowCopy tests the ShallowCopy function. It is based
// on the idea that shallow-copy should produce an object that is deep-equal to the original
// object.
@@ -415,20 +257,6 @@ func TestPredicateMetadata_ShallowCopy(t *testing.T) {
},
},
},
podAffinityMetadata: &podAffinityMetadata{
topologyToMatchedExistingAntiAffinityTerms: topologyToMatchedTermCount{
{key: "name", value: "machine1"}: 1,
{key: "name", value: "machine2"}: 1,
},
topologyToMatchedAffinityTerms: topologyToMatchedTermCount{
{key: "name", value: "nodeA"}: 1,
{key: "name", value: "nodeC"}: 2,
},
topologyToMatchedAntiAffinityTerms: topologyToMatchedTermCount{
{key: "name", value: "nodeN"}: 3,
{key: "name", value: "nodeM"}: 1,
},
},
evenPodsSpreadMetadata: &evenPodsSpreadMetadata{
tpKeyToCriticalPaths: map[string]*criticalPaths{
"name": {{"nodeA", 1}, {"nodeC", 2}},

View File

@@ -1194,23 +1194,30 @@ type PodAffinityChecker struct {
podLister schedulerlisters.PodLister
}
// NewPodAffinityChecker returns a PodAffinityChecker.
func NewPodAffinityChecker(sharedLister schedulerlisters.SharedLister) *PodAffinityChecker {
return &PodAffinityChecker{
nodeInfoLister: sharedLister.NodeInfos(),
podLister: sharedLister.Pods(),
}
}
// NewPodAffinityPredicate creates a PodAffinityChecker.
func NewPodAffinityPredicate(nodeInfoLister schedulerlisters.NodeInfoLister, podLister schedulerlisters.PodLister) FitPredicate {
checker := &PodAffinityChecker{
nodeInfoLister: nodeInfoLister,
podLister: podLister,
return func(pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
return false, nil, fmt.Errorf("This function should never be called")
}
return checker.InterPodAffinityMatches
}
// InterPodAffinityMatches checks if a pod can be scheduled on the specified node with pod affinity/anti-affinity configuration.
// First return value indicates whether a pod can be scheduled on the specified node while the second return value indicates the
// predicate failure reasons if the pod cannot be scheduled on the specified node.
func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta *PodAffinityMetadata, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
if failedPredicates, error := c.satisfiesExistingPodsAntiAffinity(pod, meta, nodeInfo); failedPredicates != nil {
failedPredicates := append([]PredicateFailureReason{ErrPodAffinityNotMatch}, failedPredicates)
return false, failedPredicates, error
@@ -1341,14 +1348,14 @@ func (c *PodAffinityChecker) getMatchingAntiAffinityTopologyPairsOfPods(pod *v1.
// Checks if scheduling the pod onto this node would break any anti-affinity
// terms indicated by the existing pods.
func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo) (PredicateFailureReason, error) {
func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta *PodAffinityMetadata, nodeInfo *schedulernodeinfo.NodeInfo) (PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return ErrExistingPodsAntiAffinityRulesNotMatch, fmt.Errorf("node not found")
}
var topologyMap topologyToMatchedTermCount
if predicateMeta, ok := meta.(*predicateMetadata); ok {
topologyMap = predicateMeta.podAffinityMetadata.topologyToMatchedExistingAntiAffinityTerms
if meta != nil {
topologyMap = meta.topologyToMatchedExistingAntiAffinityTerms
} else {
// Filter out pods whose nodeName is equal to nodeInfo.node.Name, but are not
// present in nodeInfo. Pods on other nodes pass the filter.
@@ -1416,15 +1423,15 @@ func (c *PodAffinityChecker) nodeMatchesAnyTopologyTerm(pod *v1.Pod, topologyPai
// satisfiesPodsAffinityAntiAffinity checks if scheduling the pod onto this node would break any term of this pod.
func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
meta Metadata, nodeInfo *schedulernodeinfo.NodeInfo,
predicateMeta *PodAffinityMetadata, nodeInfo *schedulernodeinfo.NodeInfo,
affinity *v1.Affinity) (PredicateFailureReason, error) {
node := nodeInfo.Node()
if node == nil {
return ErrPodAffinityRulesNotMatch, fmt.Errorf("node not found")
}
if predicateMeta, ok := meta.(*predicateMetadata); ok {
if predicateMeta != nil {
// Check all affinity terms.
topologyToMatchedAffinityTerms := predicateMeta.podAffinityMetadata.topologyToMatchedAffinityTerms
topologyToMatchedAffinityTerms := predicateMeta.topologyToMatchedAffinityTerms
if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 {
matchExists := c.nodeMatchesAllTopologyTerms(pod, topologyToMatchedAffinityTerms, nodeInfo, affinityTerms)
if !matchExists {
@@ -1441,7 +1448,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
}
// Check all anti-affinity terms.
topologyToMatchedAntiAffinityTerms := predicateMeta.podAffinityMetadata.topologyToMatchedAntiAffinityTerms
topologyToMatchedAntiAffinityTerms := predicateMeta.topologyToMatchedAntiAffinityTerms
if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 {
matchExists := c.nodeMatchesAnyTopologyTerm(pod, topologyToMatchedAntiAffinityTerms, nodeInfo, antiAffinityTerms)
if matchExists {

File diff suppressed because it is too large Load Diff