Add MinDomains API to TopologySpreadConstraints field

This commit is contained in:
sanposhiho 2022-02-23 21:11:59 +09:00
parent a41f9e976d
commit 3b13e9445a
92 changed files with 1576 additions and 1025 deletions

View File

@ -10524,12 +10524,17 @@
"description": "LabelSelector is used to find matching pods. Pods that match this label selector are counted to determine the number of pods in their corresponding topology domain."
},
"maxSkew": {
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1; scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In this case, the global minimum is 1. | zone1 | zone2 | zone3 | | P P | P P | P | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"format": "int32",
"type": "integer"
},
"minDomains": {
"description": "MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is less than minDomains, Pod Topology Spread treats \"global minimum\" as 0, and then the calculation of Skew is performed. And when the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling. As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule.\n\nFor example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | | P P | P P | P P | The number of domains is less than 5(MinDomains), so \"global minimum\" is treated as 0. In this situation, new pod with the same labelSelector cannot be scheduled, because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, it will violate MaxSkew.\n\nThis is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.",
"format": "int32",
"type": "integer"
},
"topologyKey": {
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. It's a required field.",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. We define a domain as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes match the node selector. e.g. If TopologyKey is \"kubernetes.io/hostname\", each Node is a domain of that topology. And, if TopologyKey is \"topology.kubernetes.io/zone\", each zone is a domain of that topology. It's a required field.",
"type": "string"
},
"whenUnsatisfiable": {

View File

@ -6226,13 +6226,18 @@
},
"maxSkew": {
"default": 0,
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1; scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In this case, the global minimum is 1. | zone1 | zone2 | zone3 | | P P | P P | P | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"format": "int32",
"type": "integer"
},
"minDomains": {
"description": "MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is less than minDomains, Pod Topology Spread treats \"global minimum\" as 0, and then the calculation of Skew is performed. And when the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling. As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule.\n\nFor example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | | P P | P P | P P | The number of domains is less than 5(MinDomains), so \"global minimum\" is treated as 0. In this situation, new pod with the same labelSelector cannot be scheduled, because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, it will violate MaxSkew.\n\nThis is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.",
"format": "int32",
"type": "integer"
},
"topologyKey": {
"default": "",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. It's a required field.",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. We define a domain as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes match the node selector. e.g. If TopologyKey is \"kubernetes.io/hostname\", each Node is a domain of that topology. And, if TopologyKey is \"topology.kubernetes.io/zone\", each zone is a domain of that topology. It's a required field.",
"type": "string"
},
"whenUnsatisfiable": {

View File

@ -3739,13 +3739,18 @@
},
"maxSkew": {
"default": 0,
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1; scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In this case, the global minimum is 1. | zone1 | zone2 | zone3 | | P P | P P | P | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"format": "int32",
"type": "integer"
},
"minDomains": {
"description": "MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is less than minDomains, Pod Topology Spread treats \"global minimum\" as 0, and then the calculation of Skew is performed. And when the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling. As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule.\n\nFor example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | | P P | P P | P P | The number of domains is less than 5(MinDomains), so \"global minimum\" is treated as 0. In this situation, new pod with the same labelSelector cannot be scheduled, because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, it will violate MaxSkew.\n\nThis is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.",
"format": "int32",
"type": "integer"
},
"topologyKey": {
"default": "",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. It's a required field.",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. We define a domain as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes match the node selector. e.g. If TopologyKey is \"kubernetes.io/hostname\", each Node is a domain of that topology. And, if TopologyKey is \"topology.kubernetes.io/zone\", each zone is a domain of that topology. It's a required field.",
"type": "string"
},
"whenUnsatisfiable": {

View File

@ -2965,13 +2965,18 @@
},
"maxSkew": {
"default": 0,
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1; scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In this case, the global minimum is 1. | zone1 | zone2 | zone3 | | P P | P P | P | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"format": "int32",
"type": "integer"
},
"minDomains": {
"description": "MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is less than minDomains, Pod Topology Spread treats \"global minimum\" as 0, and then the calculation of Skew is performed. And when the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling. As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule.\n\nFor example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | | P P | P P | P P | The number of domains is less than 5(MinDomains), so \"global minimum\" is treated as 0. In this situation, new pod with the same labelSelector cannot be scheduled, because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, it will violate MaxSkew.\n\nThis is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.",
"format": "int32",
"type": "integer"
},
"topologyKey": {
"default": "",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. It's a required field.",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. We define a domain as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes match the node selector. e.g. If TopologyKey is \"kubernetes.io/hostname\", each Node is a domain of that topology. And, if TopologyKey is \"topology.kubernetes.io/zone\", each zone is a domain of that topology. It's a required field.",
"type": "string"
},
"whenUnsatisfiable": {

View File

@ -2767,13 +2767,18 @@
},
"maxSkew": {
"default": 0,
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1; scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"description": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In this case, the global minimum is 1. | zone1 | zone2 | zone3 | | P P | P P | P | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
"format": "int32",
"type": "integer"
},
"minDomains": {
"description": "MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is less than minDomains, Pod Topology Spread treats \"global minimum\" as 0, and then the calculation of Skew is performed. And when the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling. As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule.\n\nFor example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | | P P | P P | P P | The number of domains is less than 5(MinDomains), so \"global minimum\" is treated as 0. In this situation, new pod with the same labelSelector cannot be scheduled, because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, it will violate MaxSkew.\n\nThis is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.",
"format": "int32",
"type": "integer"
},
"topologyKey": {
"default": "",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. It's a required field.",
"description": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. We define a domain as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes match the node selector. e.g. If TopologyKey is \"kubernetes.io/hostname\", each Node is a domain of that topology. And, if TopologyKey is \"topology.kubernetes.io/zone\", each zone is a domain of that topology. It's a required field.",
"type": "string"
},
"whenUnsatisfiable": {

View File

@ -583,6 +583,35 @@ func dropDisabledFields(
if !utilfeature.DefaultFeatureGate.Enabled(features.IdentifyPodOS) && !podOSInUse(oldPodSpec) {
podSpec.OS = nil
}
dropDisabledTopologySpreadConstraintsFields(podSpec, oldPodSpec)
}
// dropDisabledTopologySpreadConstraintsFields removes disabled fields from PodSpec related
// to TopologySpreadConstraints only if it is not already used by the old spec.
func dropDisabledTopologySpreadConstraintsFields(podSpec, oldPodSpec *api.PodSpec) {
if !utilfeature.DefaultFeatureGate.Enabled(features.MinDomainsInPodTopologySpread) &&
!minDomainsInUse(oldPodSpec) &&
podSpec != nil {
for i := range podSpec.TopologySpreadConstraints {
podSpec.TopologySpreadConstraints[i].MinDomains = nil
}
}
}
// minDomainsInUse returns true if the pod spec is non-nil
// and has non-nil MinDomains field in TopologySpreadConstraints.
func minDomainsInUse(podSpec *api.PodSpec) bool {
if podSpec == nil {
return false
}
for _, c := range podSpec.TopologySpreadConstraints {
if c.MinDomains != nil {
return true
}
}
return false
}
// podOSInUse returns true if the pod spec is non-nil and has OS field set

View File

@ -33,6 +33,7 @@ import (
featuregatetesting "k8s.io/component-base/featuregate/testing"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/features"
"k8s.io/utils/pointer"
)
func TestVisitContainers(t *testing.T) {
@ -1519,6 +1520,173 @@ func TestHaveSameExpandedDNSConfig(t *testing.T) {
}
}
func TestDropDisabledTopologySpreadConstraintsFields(t *testing.T) {
testCases := []struct {
name string
enabled bool
podSpec *api.PodSpec
oldPodSpec *api.PodSpec
wantPodSpec *api.PodSpec
}{
{
name: "TopologySpreadConstraints is nil",
podSpec: &api.PodSpec{},
oldPodSpec: &api.PodSpec{},
wantPodSpec: &api.PodSpec{},
},
{
name: "TopologySpreadConstraints is empty",
podSpec: &api.PodSpec{TopologySpreadConstraints: []api.TopologySpreadConstraint{}},
oldPodSpec: &api.PodSpec{TopologySpreadConstraints: []api.TopologySpreadConstraint{}},
wantPodSpec: &api.PodSpec{TopologySpreadConstraints: []api.TopologySpreadConstraint{}},
},
{
name: "TopologySpreadConstraints is not empty, but all constraints don't have minDomains",
podSpec: &api.PodSpec{TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: nil,
},
{
MinDomains: nil,
},
}},
oldPodSpec: &api.PodSpec{TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: nil,
},
{
MinDomains: nil,
},
}},
wantPodSpec: &api.PodSpec{TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: nil,
},
{
MinDomains: nil,
},
}},
},
{
name: "one constraint in podSpec has non-empty minDomains, feature gate is disabled " +
"and all constraint in oldPodSpec doesn't have minDomains",
enabled: false,
podSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: pointer.Int32(2),
},
{
MinDomains: nil,
},
},
},
oldPodSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: nil,
},
{
MinDomains: nil,
},
},
},
wantPodSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
// cleared.
MinDomains: nil,
},
{
MinDomains: nil,
},
},
},
},
{
name: "one constraint in podSpec has non-empty minDomains, feature gate is disabled " +
"and one constraint in oldPodSpec has minDomains",
enabled: false,
podSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: pointer.Int32(2),
},
{
MinDomains: nil,
},
},
},
oldPodSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: pointer.Int32(2),
},
{
MinDomains: nil,
},
},
},
wantPodSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
// not cleared.
MinDomains: pointer.Int32(2),
},
{
MinDomains: nil,
},
},
},
},
{
name: "one constraint in podSpec has non-empty minDomains, feature gate is enabled" +
"and all constraint in oldPodSpec doesn't have minDomains",
enabled: true,
podSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: pointer.Int32(2),
},
{
MinDomains: nil,
},
},
},
oldPodSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
MinDomains: nil,
},
{
MinDomains: nil,
},
},
},
wantPodSpec: &api.PodSpec{
TopologySpreadConstraints: []api.TopologySpreadConstraint{
{
// not cleared.
MinDomains: pointer.Int32(2),
},
{
MinDomains: nil,
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.MinDomainsInPodTopologySpread, tc.enabled)()
dropDisabledFields(tc.podSpec, nil, tc.oldPodSpec, nil)
if diff := cmp.Diff(tc.wantPodSpec, tc.podSpec); diff != "" {
t.Errorf("unexpected pod spec (-want, +got):\n%s", diff)
}
})
}
}
func TestDropOSField(t *testing.T) {
podWithOSField := func() *api.Pod {
osField := api.PodOS{Name: "linux"}

View File

@ -5605,15 +5605,18 @@ type TopologySpreadConstraint struct {
// MaxSkew describes the degree to which pods may be unevenly distributed.
// When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference
// between the number of matching pods in the target topology and the global minimum.
// The global minimum is the minimum number of matching pods in an eligible domain
// or zero if the number of eligible domains is less than MinDomains.
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
// labelSelector spread as 1/1/0:
// labelSelector spread as 2/2/1:
// In this case, the global minimum is 1.
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P | P | |
// | P P | P P | P |
// +-------+-------+-------+
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1;
// scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2)
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2;
// scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2)
// violate MaxSkew(1).
// - if MaxSkew is 2, incoming pod can be scheduled onto any zone.
// When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence
@ -5624,6 +5627,10 @@ type TopologySpreadConstraint struct {
// and identical values are considered to be in the same topology.
// We consider each <key, value> as a "bucket", and try to put balanced number
// of pods into each bucket.
// We define a domain as a particular instance of a topology.
// Also, we define an eligible domain as a domain whose nodes match the node selector.
// e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology.
// And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology.
// It's a required field.
TopologyKey string
// WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy
@ -5653,6 +5660,32 @@ type TopologySpreadConstraint struct {
// in their corresponding topology domain.
// +optional
LabelSelector *metav1.LabelSelector
// MinDomains indicates a minimum number of eligible domains.
// When the number of eligible domains with matching topology keys is less than minDomains,
// Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed.
// And when the number of eligible domains with matching topology keys equals or greater than minDomains,
// this value has no effect on scheduling.
// As a result, when the number of eligible domains is less than minDomains,
// scheduler won't schedule more than maxSkew Pods to those domains.
// If value is nil, the constraint behaves as if MinDomains is equal to 1.
// Valid values are integers greater than 0.
// When value is not nil, WhenUnsatisfiable must be DoNotSchedule.
//
// For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same
// labelSelector spread as 2/2/2:
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P P | P P | P P |
// +-------+-------+-------+
// The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0.
// In this situation, new pod with the same labelSelector cannot be scheduled,
// because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones,
// it will violate MaxSkew.
//
// This is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.
// +optional
MinDomains *int32
}
// These are the built-in errors for PortStatus.

View File

@ -22,11 +22,10 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/kubernetes/pkg/util/parsers"
utilpointer "k8s.io/utils/pointer"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/util/parsers"
"k8s.io/utils/pointer"
)
func addDefaultingFuncs(scheme *runtime.Scheme) error {
@ -64,7 +63,7 @@ func SetDefaults_ReplicationController(obj *v1.ReplicationController) {
}
}
func SetDefaults_Volume(obj *v1.Volume) {
if utilpointer.AllPtrFieldsNil(&obj.VolumeSource) {
if pointer.AllPtrFieldsNil(&obj.VolumeSource) {
obj.VolumeSource = v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
}
@ -143,7 +142,7 @@ func SetDefaults_Service(obj *v1.Service) {
if obj.Spec.Type == v1.ServiceTypeLoadBalancer {
if obj.Spec.AllocateLoadBalancerNodePorts == nil {
obj.Spec.AllocateLoadBalancerNodePorts = utilpointer.BoolPtr(true)
obj.Spec.AllocateLoadBalancerNodePorts = pointer.BoolPtr(true)
}
}
}

View File

@ -7994,6 +7994,7 @@ func autoConvert_v1_TopologySpreadConstraint_To_core_TopologySpreadConstraint(in
out.TopologyKey = in.TopologyKey
out.WhenUnsatisfiable = core.UnsatisfiableConstraintAction(in.WhenUnsatisfiable)
out.LabelSelector = (*metav1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
out.MinDomains = (*int32)(unsafe.Pointer(in.MinDomains))
return nil
}
@ -8007,6 +8008,7 @@ func autoConvert_core_TopologySpreadConstraint_To_v1_TopologySpreadConstraint(in
out.TopologyKey = in.TopologyKey
out.WhenUnsatisfiable = v1.UnsatisfiableConstraintAction(in.WhenUnsatisfiable)
out.LabelSelector = (*metav1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
out.MinDomains = (*int32)(unsafe.Pointer(in.MinDomains))
return nil
}

View File

@ -6528,6 +6528,7 @@ func validateTopologySpreadConstraints(constraints []core.TopologySpreadConstrai
if err := ValidateSpreadConstraintNotRepeat(subFldPath.Child("{topologyKey, whenUnsatisfiable}"), constraint, constraints[i+1:]); err != nil {
allErrs = append(allErrs, err)
}
allErrs = append(allErrs, validateMinDomains(subFldPath.Child("minDomains"), constraint.MinDomains, constraint.WhenUnsatisfiable)...)
}
return allErrs
@ -6541,6 +6542,22 @@ func ValidateMaxSkew(fldPath *field.Path, maxSkew int32) *field.Error {
return nil
}
// validateMinDomains tests that the argument is a valid MinDomains.
func validateMinDomains(fldPath *field.Path, minDomains *int32, action core.UnsatisfiableConstraintAction) field.ErrorList {
if minDomains == nil {
return nil
}
var allErrs field.ErrorList
if *minDomains <= 0 {
allErrs = append(allErrs, field.Invalid(fldPath, minDomains, isNotPositiveErrorMsg))
}
// When MinDomains is non-nil, whenUnsatisfiable must be DoNotSchedule.
if action != core.DoNotSchedule {
allErrs = append(allErrs, field.Invalid(fldPath, minDomains, fmt.Sprintf("can only use minDomains if whenUnsatisfiable=%s, not %s", string(core.DoNotSchedule), string(action))))
}
return allErrs
}
// ValidateTopologyKey tests that the argument is a valid TopologyKey.
func ValidateTopologyKey(fldPath *field.Path, topologyKey string) *field.Error {
if len(topologyKey) == 0 {

View File

@ -43,6 +43,7 @@ import (
"k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/capabilities"
"k8s.io/kubernetes/pkg/features"
"k8s.io/utils/pointer"
utilpointer "k8s.io/utils/pointer"
)
@ -18804,57 +18805,115 @@ func TestAnyDataSource(t *testing.T) {
}
func TestValidateTopologySpreadConstraints(t *testing.T) {
fieldPath := field.NewPath("field")
subFldPath0 := fieldPath.Index(0)
fieldPathMinDomains := subFldPath0.Child("minDomains")
fieldPathMaxSkew := subFldPath0.Child("maxSkew")
fieldPathTopologyKey := subFldPath0.Child("topologyKey")
fieldPathWhenUnsatisfiable := subFldPath0.Child("whenUnsatisfiable")
fieldPathTopologyKeyAndWhenUnsatisfiable := subFldPath0.Child("{topologyKey, whenUnsatisfiable}")
testCases := []struct {
name string
constraints []core.TopologySpreadConstraint
errtype field.ErrorType
errfield string
wantFieldErrors field.ErrorList
}{
{
name: "all required fields ok",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
{
MaxSkew: 1,
TopologyKey: "k8s.io/zone",
WhenUnsatisfiable: core.DoNotSchedule,
MinDomains: pointer.Int32(3),
},
},
wantFieldErrors: field.ErrorList{},
},
{
name: "missing MaxSkew",
constraints: []core.TopologySpreadConstraint{
{TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
},
errtype: field.ErrorTypeInvalid,
errfield: "maxSkew",
wantFieldErrors: []*field.Error{field.Invalid(fieldPathMaxSkew, int32(0), isNotPositiveErrorMsg)},
},
{
name: "invalid MaxSkew",
name: "negative MaxSkew",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 0, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
{MaxSkew: -1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
},
wantFieldErrors: []*field.Error{field.Invalid(fieldPathMaxSkew, int32(-1), isNotPositiveErrorMsg)},
},
{
name: "can use MinDomains with ScheduleAnyway, when MinDomains = nil",
constraints: []core.TopologySpreadConstraint{
{
MaxSkew: 1,
TopologyKey: "k8s.io/zone",
WhenUnsatisfiable: core.ScheduleAnyway,
MinDomains: nil,
},
},
wantFieldErrors: field.ErrorList{},
},
{
name: "negative minDomains is invalid",
constraints: []core.TopologySpreadConstraint{
{
MaxSkew: 1,
TopologyKey: "k8s.io/zone",
WhenUnsatisfiable: core.DoNotSchedule,
MinDomains: pointer.Int32(-1),
},
},
wantFieldErrors: []*field.Error{field.Invalid(fieldPathMinDomains, pointer.Int32(-1), isNotPositiveErrorMsg)},
},
{
name: "cannot use non-nil MinDomains with ScheduleAnyway",
constraints: []core.TopologySpreadConstraint{
{
MaxSkew: 1,
TopologyKey: "k8s.io/zone",
WhenUnsatisfiable: core.ScheduleAnyway,
MinDomains: pointer.Int32(10),
},
},
wantFieldErrors: []*field.Error{field.Invalid(fieldPathMinDomains, pointer.Int32(10), fmt.Sprintf("can only use minDomains if whenUnsatisfiable=%s, not %s", string(core.DoNotSchedule), string(core.ScheduleAnyway)))},
},
{
name: "use negative MinDomains with ScheduleAnyway(invalid)",
constraints: []core.TopologySpreadConstraint{
{
MaxSkew: 1,
TopologyKey: "k8s.io/zone",
WhenUnsatisfiable: core.ScheduleAnyway,
MinDomains: pointer.Int32(-1),
},
},
wantFieldErrors: []*field.Error{
field.Invalid(fieldPathMinDomains, pointer.Int32(-1), isNotPositiveErrorMsg),
field.Invalid(fieldPathMinDomains, pointer.Int32(-1), fmt.Sprintf("can only use minDomains if whenUnsatisfiable=%s, not %s", string(core.DoNotSchedule), string(core.ScheduleAnyway))),
},
errtype: field.ErrorTypeInvalid,
errfield: "maxSkew",
},
{
name: "missing TopologyKey",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, WhenUnsatisfiable: core.DoNotSchedule},
},
errtype: field.ErrorTypeRequired,
errfield: "topologyKey",
wantFieldErrors: []*field.Error{field.Required(fieldPathTopologyKey, "can not be empty")},
},
{
name: "missing scheduling mode",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone"},
},
errtype: field.ErrorTypeNotSupported,
errfield: "whenUnsatisfiable",
wantFieldErrors: []*field.Error{field.NotSupported(fieldPathWhenUnsatisfiable, core.UnsatisfiableConstraintAction(""), supportedScheduleActions.List())},
},
{
name: "unsupported scheduling mode",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.UnsatisfiableConstraintAction("N/A")},
},
errtype: field.ErrorTypeNotSupported,
errfield: "whenUnsatisfiable",
wantFieldErrors: []*field.Error{field.NotSupported(fieldPathWhenUnsatisfiable, core.UnsatisfiableConstraintAction("N/A"), supportedScheduleActions.List())},
},
{
name: "multiple constraints ok with all required fields",
@ -18862,15 +18921,15 @@ func TestValidateTopologySpreadConstraints(t *testing.T) {
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
{MaxSkew: 2, TopologyKey: "k8s.io/node", WhenUnsatisfiable: core.ScheduleAnyway},
},
wantFieldErrors: field.ErrorList{},
},
{
name: "multiple constraints missing TopologyKey on partial ones",
constraints: []core.TopologySpreadConstraint{
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
{MaxSkew: 2, WhenUnsatisfiable: core.ScheduleAnyway},
{MaxSkew: 1, WhenUnsatisfiable: core.ScheduleAnyway},
{MaxSkew: 2, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
},
errtype: field.ErrorTypeRequired,
errfield: "topologyKey",
wantFieldErrors: []*field.Error{field.Required(fieldPathTopologyKey, "can not be empty")},
},
{
name: "duplicate constraints",
@ -18878,25 +18937,19 @@ func TestValidateTopologySpreadConstraints(t *testing.T) {
{MaxSkew: 1, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
{MaxSkew: 2, TopologyKey: "k8s.io/zone", WhenUnsatisfiable: core.DoNotSchedule},
},
errtype: field.ErrorTypeDuplicate,
errfield: "{topologyKey, whenUnsatisfiable}",
wantFieldErrors: []*field.Error{
field.Duplicate(fieldPathTopologyKeyAndWhenUnsatisfiable, fmt.Sprintf("{%v, %v}", "k8s.io/zone", core.DoNotSchedule)),
},
},
}
for i, tc := range testCases {
errs := validateTopologySpreadConstraints(tc.constraints, field.NewPath("field"))
if len(errs) > 0 && tc.errtype == "" {
t.Errorf("[%d: %q] unexpected error(s): %v", i, tc.name, errs)
} else if len(errs) == 0 && tc.errtype != "" {
t.Errorf("[%d: %q] expected error type %v", i, tc.name, tc.errtype)
} else if len(errs) >= 1 {
if errs[0].Type != tc.errtype {
t.Errorf("[%d: %q] expected error type %v, got %v", i, tc.name, tc.errtype, errs[0].Type)
} else if !strings.HasSuffix(errs[0].Field, "."+tc.errfield) {
t.Errorf("[%d: %q] expected error on field %q, got %q", i, tc.name, tc.errfield, errs[0].Field)
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
errs := validateTopologySpreadConstraints(tc.constraints, fieldPath)
if diff := cmp.Diff(tc.wantFieldErrors, errs); diff != "" {
t.Errorf("unexpected field errors (-want, +got):\n%s", diff)
}
})
}
}

View File

@ -5629,6 +5629,11 @@ func (in *TopologySpreadConstraint) DeepCopyInto(out *TopologySpreadConstraint)
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.MinDomains != nil {
in, out := &in.MinDomains, &out.MinDomains
*out = new(int32)
**out = **in
}
return
}

View File

@ -826,6 +826,13 @@ const (
//
// Stop auto-generation of secret-based service account tokens.
LegacyServiceAccountTokenNoAutoGeneration featuregate.Feature = "LegacyServiceAccountTokenNoAutoGeneration"
// owner: @sanposhiho
// kep: http://kep.k8s.io/3022
// alpha: v1.24
//
// Enable MinDomains in Pod Topology Spread.
MinDomainsInPodTopologySpread featuregate.Feature = "MinDomainsInPodTopologySpread"
)
func init() {
@ -947,6 +954,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
RecoverVolumeExpansionFailure: {Default: false, PreRelease: featuregate.Alpha},
GRPCContainerProbe: {Default: false, PreRelease: featuregate.Alpha},
LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta},
MinDomainsInPodTopologySpread: {Default: false, PreRelease: featuregate.Alpha},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:

View File

@ -25362,7 +25362,7 @@ func schema_k8sio_api_core_v1_TopologySpreadConstraint(ref common.ReferenceCallb
Properties: map[string]spec.Schema{
"maxSkew": {
SchemaProps: spec.SchemaProps{
Description: "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 1/1/0: | zone1 | zone2 | zone3 | | P | P | | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1; scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
Description: "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In this case, the global minimum is 1. | zone1 | zone2 | zone3 | | P P | P P | P | - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) violate MaxSkew(1). - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.",
Default: 0,
Type: []string{"integer"},
Format: "int32",
@ -25370,7 +25370,7 @@ func schema_k8sio_api_core_v1_TopologySpreadConstraint(ref common.ReferenceCallb
},
"topologyKey": {
SchemaProps: spec.SchemaProps{
Description: "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. It's a required field.",
Description: "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. We define a domain as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes match the node selector. e.g. If TopologyKey is \"kubernetes.io/hostname\", each Node is a domain of that topology. And, if TopologyKey is \"topology.kubernetes.io/zone\", each zone is a domain of that topology. It's a required field.",
Default: "",
Type: []string{"string"},
Format: "",
@ -25390,6 +25390,13 @@ func schema_k8sio_api_core_v1_TopologySpreadConstraint(ref common.ReferenceCallb
Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector"),
},
},
"minDomains": {
SchemaProps: spec.SchemaProps{
Description: "MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is less than minDomains, Pod Topology Spread treats \"global minimum\" as 0, and then the calculation of Skew is performed. And when the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling. As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule.\n\nFor example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | | P P | P P | P P | The number of domains is less than 5(MinDomains), so \"global minimum\" is treated as 0. In this situation, new pod with the same labelSelector cannot be scheduled, because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, it will violate MaxSkew.\n\nThis is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.",
Type: []string{"integer"},
Format: "int32",
},
},
},
Required: []string{"maxSkew", "topologyKey", "whenUnsatisfiable"},
},

View File

@ -30,7 +30,6 @@ import (
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
@ -96,6 +95,7 @@ func getDefaultDefaultPreemptionArgs() *config.DefaultPreemptionArgs {
}
var nodeResourcesFitFunc = frameworkruntime.FactoryAdapter(feature.Features{}, noderesources.NewFit)
var podTopologySpreadFunc = frameworkruntime.FactoryAdapter(feature.Features{}, podtopologyspread.New)
// TestPlugin returns Error status when trying to `AddPod` or `RemovePod` on the nodes which have the {k,v} label pair defined on the nodes.
type TestPlugin struct {
@ -636,7 +636,7 @@ func TestDryRunPreemption(t *testing.T) {
{
name: "preemption to resolve pod topology spread filter failure",
registerPlugins: []st.RegisterPluginFunc{
st.RegisterPluginAsExtensions(podtopologyspread.Name, podtopologyspread.New, "PreFilter", "Filter"),
st.RegisterPluginAsExtensions(podtopologyspread.Name, podTopologySpreadFunc, "PreFilter", "Filter"),
},
nodeNames: []string{"node-a/zone1", "node-b/zone1", "node-x/zone2"},
testPods: []*v1.Pod{
@ -1495,7 +1495,7 @@ func TestPreempt(t *testing.T) {
st.MakePod().Name("p-x2").UID("p-x2").Namespace(v1.NamespaceDefault).Node("node-x").Label("foo", "").Priority(highPriority).Obj(),
},
nodeNames: []string{"node-a/zone1", "node-b/zone1", "node-x/zone2"},
registerPlugin: st.RegisterPluginAsExtensions(podtopologyspread.Name, podtopologyspread.New, "PreFilter", "Filter"),
registerPlugin: st.RegisterPluginAsExtensions(podtopologyspread.Name, podTopologySpreadFunc, "PreFilter", "Filter"),
want: framework.NewPostFilterResultWithNominatedNode("node-b"),
expectedPods: []string{"p-b1"},
},

View File

@ -26,4 +26,5 @@ type Features struct {
EnableReadWriteOncePod bool
EnableVolumeCapacityPriority bool
EnableCSIStorageCapacity bool
EnableMinDomainsInPodTopologySpread bool
}

View File

@ -36,13 +36,14 @@ type topologySpreadConstraint struct {
MaxSkew int32
TopologyKey string
Selector labels.Selector
MinDomains int32
}
// buildDefaultConstraints builds the constraints for a pod using
// .DefaultConstraints and the selectors from the services, replication
// controllers, replica sets and stateful sets that match the pod.
func (pl *PodTopologySpread) buildDefaultConstraints(p *v1.Pod, action v1.UnsatisfiableConstraintAction) ([]topologySpreadConstraint, error) {
constraints, err := filterTopologySpreadConstraints(pl.defaultConstraints, action)
constraints, err := filterTopologySpreadConstraints(pl.defaultConstraints, action, pl.enableMinDomainsInPodTopologySpread)
if err != nil || len(constraints) == 0 {
return nil, err
}
@ -66,7 +67,7 @@ func nodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints
return true
}
func filterTopologySpreadConstraints(constraints []v1.TopologySpreadConstraint, action v1.UnsatisfiableConstraintAction) ([]topologySpreadConstraint, error) {
func filterTopologySpreadConstraints(constraints []v1.TopologySpreadConstraint, action v1.UnsatisfiableConstraintAction, enableMinDomainsInPodTopologySpread bool) ([]topologySpreadConstraint, error) {
var result []topologySpreadConstraint
for _, c := range constraints {
if c.WhenUnsatisfiable == action {
@ -74,11 +75,16 @@ func filterTopologySpreadConstraints(constraints []v1.TopologySpreadConstraint,
if err != nil {
return nil, err
}
result = append(result, topologySpreadConstraint{
tsc := topologySpreadConstraint{
MaxSkew: c.MaxSkew,
TopologyKey: c.TopologyKey,
Selector: selector,
})
MinDomains: 1, // if MinDomains is nil, we treat MinDomains as 1.
}
if enableMinDomainsInPodTopologySpread && c.MinDomains != nil {
tsc.MinDomains = *c.MinDomains
}
result = append(result, tsc)
}
}
return result, nil

View File

@ -201,7 +201,7 @@ func (pl *PodTopologySpread) calPreFilterState(ctx context.Context, pod *v1.Pod)
if len(pod.Spec.TopologySpreadConstraints) > 0 {
// We have feature gating in APIServer to strip the spec
// so don't need to re-check feature gate, just check length of Constraints.
constraints, err = filterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.DoNotSchedule)
constraints, err = filterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.DoNotSchedule, pl.enableMinDomainsInPodTopologySpread)
if err != nil {
return nil, fmt.Errorf("obtaining pod's hard topology spread constraints: %w", err)
}

View File

@ -29,7 +29,9 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
plugintesting "k8s.io/kubernetes/pkg/scheduler/framework/plugins/testing"
frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
"k8s.io/kubernetes/pkg/scheduler/internal/cache"
st "k8s.io/kubernetes/pkg/scheduler/testing"
)
@ -45,6 +47,8 @@ var cmpOpts = []cmp.Option{
}),
}
var topologySpreadFunc = frameworkruntime.FactoryAdapter(feature.Features{}, New)
func (p *criticalPaths) sort() {
if p[0].MatchNum == p[1].MatchNum && p[0].TopologyValue > p[1].TopologyValue {
// Swap TopologyValue to make them sorted alphabetically.
@ -81,6 +85,7 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 5,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Label("foo", "bar").Obj()),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -116,6 +121,7 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -153,6 +159,7 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -189,6 +196,7 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -227,11 +235,13 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
{
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -276,11 +286,13 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
{
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -317,11 +329,13 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
{
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, barSelector),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -363,11 +377,13 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
{
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, barSelector),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -411,11 +427,13 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
{
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, fooSelector),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -448,11 +466,13 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 3,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Label("foo", "bar").Obj()),
MinDomains: 1,
},
{
MaxSkew: 5,
TopologyKey: "rack",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Label("foo", "bar").Obj()),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -490,6 +510,7 @@ func TestPreFilterState(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Label("baz", "tar").Obj()),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -518,7 +539,7 @@ func TestPreFilterState(t *testing.T) {
DefaultConstraints: tt.defaultConstraints,
DefaultingType: config.ListDefaulting,
}
p := plugintesting.SetupPluginWithInformers(ctx, t, New, args, cache.NewSnapshot(tt.existingPods, tt.nodes), tt.objs)
p := plugintesting.SetupPluginWithInformers(ctx, t, topologySpreadFunc, args, cache.NewSnapshot(tt.existingPods, tt.nodes), tt.objs)
cs := framework.NewCycleState()
if s := p.(*PodTopologySpread).PreFilter(ctx, cs, tt.pod); !s.IsSuccess() {
t.Fatal(s.AsError())
@ -539,6 +560,7 @@ func TestPreFilterStateAddPod(t *testing.T) {
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
}
zoneConstraint := nodeConstraint
zoneConstraint.TopologyKey = "zone"
@ -763,6 +785,7 @@ func TestPreFilterStateAddPod(t *testing.T) {
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("bar").Obj()),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -803,6 +826,7 @@ func TestPreFilterStateAddPod(t *testing.T) {
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("bar").Obj()),
MinDomains: 1,
},
},
TpKeyToCriticalPaths: map[string]*criticalPaths{
@ -823,7 +847,7 @@ func TestPreFilterStateAddPod(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()
snapshot := cache.NewSnapshot(tt.existingPods, tt.nodes)
pl := plugintesting.SetupPlugin(t, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
pl := plugintesting.SetupPlugin(t, topologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
p := pl.(*PodTopologySpread)
cs := framework.NewCycleState()
if s := p.PreFilter(ctx, cs, tt.preemptor); !s.IsSuccess() {
@ -852,6 +876,7 @@ func TestPreFilterStateRemovePod(t *testing.T) {
MaxSkew: 1,
TopologyKey: "node",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
}
zoneConstraint := nodeConstraint
zoneConstraint.TopologyKey = "zone"
@ -1027,7 +1052,7 @@ func TestPreFilterStateRemovePod(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()
snapshot := cache.NewSnapshot(tt.existingPods, tt.nodes)
pl := plugintesting.SetupPlugin(t, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
pl := plugintesting.SetupPlugin(t, topologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
p := pl.(*PodTopologySpread)
cs := framework.NewCycleState()
s := p.PreFilter(ctx, cs, tt.preemptor)
@ -1101,7 +1126,7 @@ func BenchmarkFilter(b *testing.B) {
b.Run(tt.name, func(b *testing.B) {
existingPods, allNodes, _ := st.MakeNodesAndPodsForEvenPodsSpread(tt.pod.Labels, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum)
ctx := context.Background()
pl := plugintesting.SetupPlugin(b, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, cache.NewSnapshot(existingPods, allNodes))
pl := plugintesting.SetupPlugin(b, topologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, cache.NewSnapshot(existingPods, allNodes))
p := pl.(*PodTopologySpread)
b.ResetTimer()
for i := 0; i < b.N; i++ {
@ -1429,7 +1454,7 @@ func TestSingleConstraint(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
snapshot := cache.NewSnapshot(tt.existingPods, tt.nodes)
pl := plugintesting.SetupPlugin(t, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
pl := plugintesting.SetupPlugin(t, topologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
p := pl.(*PodTopologySpread)
state := framework.NewCycleState()
preFilterStatus := p.PreFilter(context.Background(), state, tt.pod)
@ -1656,7 +1681,7 @@ func TestMultipleConstraints(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
snapshot := cache.NewSnapshot(tt.existingPods, tt.nodes)
pl := plugintesting.SetupPlugin(t, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
pl := plugintesting.SetupPlugin(t, topologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, snapshot)
p := pl.(*PodTopologySpread)
state := framework.NewCycleState()
preFilterStatus := p.PreFilter(context.Background(), state, tt.pod)
@ -1680,7 +1705,7 @@ func TestPreFilterDisabled(t *testing.T) {
nodeInfo := framework.NewNodeInfo()
node := v1.Node{}
nodeInfo.SetNode(&node)
p := plugintesting.SetupPlugin(t, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, cache.NewEmptySnapshot())
p := plugintesting.SetupPlugin(t, topologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, cache.NewEmptySnapshot())
cycleState := framework.NewCycleState()
gotStatus := p.(*PodTopologySpread).Filter(context.Background(), cycleState, pod, nodeInfo)
wantStatus := framework.AsStatus(fmt.Errorf(`reading "PreFilterPodTopologySpread" from cycleState: %w`, framework.ErrNotFound))

View File

@ -28,6 +28,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
)
@ -61,6 +62,7 @@ type PodTopologySpread struct {
replicationCtrls corelisters.ReplicationControllerLister
replicaSets appslisters.ReplicaSetLister
statefulSets appslisters.StatefulSetLister
enableMinDomainsInPodTopologySpread bool
}
var _ framework.PreFilterPlugin = &PodTopologySpread{}
@ -80,7 +82,7 @@ func (pl *PodTopologySpread) Name() string {
}
// New initializes a new plugin and returns it.
func New(plArgs runtime.Object, h framework.Handle) (framework.Plugin, error) {
func New(plArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
if h.SnapshotSharedLister() == nil {
return nil, fmt.Errorf("SnapshotSharedlister is nil")
}
@ -95,6 +97,7 @@ func New(plArgs runtime.Object, h framework.Handle) (framework.Plugin, error) {
parallelizer: h.Parallelizer(),
sharedLister: h.SnapshotSharedLister(),
defaultConstraints: args.DefaultConstraints,
enableMinDomainsInPodTopologySpread: fts.EnableMinDomainsInPodTopologySpread,
}
if args.DefaultingType == config.SystemDefaulting {
pl.defaultConstraints = systemDefaultConstraints

View File

@ -59,7 +59,7 @@ func (s *preScoreState) Clone() framework.StateData {
func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, filteredNodes []*v1.Node, requireAllTopologies bool) error {
var err error
if len(pod.Spec.TopologySpreadConstraints) > 0 {
s.Constraints, err = filterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.ScheduleAnyway)
s.Constraints, err = filterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.ScheduleAnyway, pl.enableMinDomainsInPodTopologySpread)
if err != nil {
return fmt.Errorf("obtaining pod's soft topology spread constraints: %w", err)
}

View File

@ -30,6 +30,7 @@ import (
"k8s.io/client-go/kubernetes/fake"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
plugintesting "k8s.io/kubernetes/pkg/scheduler/framework/plugins/testing"
frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
"k8s.io/kubernetes/pkg/scheduler/internal/cache"
@ -37,6 +38,8 @@ import (
"k8s.io/utils/pointer"
)
var podTopologySpreadFunc = frameworkruntime.FactoryAdapter(feature.Features{}, New)
func TestPreScoreStateEmptyNodes(t *testing.T) {
tests := []struct {
name string
@ -66,11 +69,13 @@ func TestPreScoreStateEmptyNodes(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
},
{
MaxSkew: 1,
TopologyKey: v1.LabelHostname,
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
},
},
IgnoredNodes: sets.NewString(),
@ -101,11 +106,13 @@ func TestPreScoreStateEmptyNodes(t *testing.T) {
MaxSkew: 1,
TopologyKey: "zone",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
},
{
MaxSkew: 1,
TopologyKey: v1.LabelHostname,
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("bar").Obj()),
MinDomains: 1,
},
},
IgnoredNodes: sets.NewString("node-x"),
@ -137,11 +144,13 @@ func TestPreScoreStateEmptyNodes(t *testing.T) {
MaxSkew: 3,
TopologyKey: v1.LabelHostname,
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
},
{
MaxSkew: 5,
TopologyKey: v1.LabelTopologyZone,
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
},
},
IgnoredNodes: sets.NewString(),
@ -175,11 +184,13 @@ func TestPreScoreStateEmptyNodes(t *testing.T) {
MaxSkew: 1,
TopologyKey: v1.LabelHostname,
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
},
{
MaxSkew: 2,
TopologyKey: "planet",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Exists("foo").Obj()),
MinDomains: 1,
},
},
IgnoredNodes: sets.NewString(),
@ -232,6 +243,7 @@ func TestPreScoreStateEmptyNodes(t *testing.T) {
MaxSkew: 2,
TopologyKey: "planet",
Selector: mustConvertLabelSelectorAsSelector(t, st.MakeLabelSelector().Label("baz", "sup").Obj()),
MinDomains: 1,
},
},
IgnoredNodes: sets.NewString(),
@ -253,7 +265,7 @@ func TestPreScoreStateEmptyNodes(t *testing.T) {
if err != nil {
t.Fatalf("Failed creating framework runtime: %v", err)
}
pl, err := New(&tt.config, f)
pl, err := New(&tt.config, f, feature.Features{})
if err != nil {
t.Fatalf("Failed creating plugin: %v", err)
}
@ -732,7 +744,7 @@ func TestPodTopologySpreadScore(t *testing.T) {
allNodes := append([]*v1.Node{}, tt.nodes...)
allNodes = append(allNodes, tt.failedNodes...)
state := framework.NewCycleState()
pl := plugintesting.SetupPluginWithInformers(ctx, t, New, &config.PodTopologySpreadArgs{DefaultingType: config.SystemDefaulting}, cache.NewSnapshot(tt.existingPods, allNodes), tt.objs)
pl := plugintesting.SetupPluginWithInformers(ctx, t, podTopologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.SystemDefaulting}, cache.NewSnapshot(tt.existingPods, allNodes), tt.objs)
p := pl.(*PodTopologySpread)
status := p.PreScore(context.Background(), state, tt.pod, tt.nodes)
@ -802,7 +814,7 @@ func BenchmarkTestPodTopologySpreadScore(b *testing.B) {
b.Run(tt.name, func(b *testing.B) {
existingPods, allNodes, filteredNodes := st.MakeNodesAndPodsForEvenPodsSpread(tt.pod.Labels, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum)
state := framework.NewCycleState()
pl := plugintesting.SetupPlugin(b, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, cache.NewSnapshot(existingPods, allNodes))
pl := plugintesting.SetupPlugin(b, podTopologySpreadFunc, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, cache.NewSnapshot(existingPods, allNodes))
p := pl.(*PodTopologySpread)
status := p.PreScore(context.Background(), state, tt.pod, filteredNodes)
@ -879,7 +891,7 @@ func BenchmarkTestDefaultEvenPodsSpreadPriority(b *testing.B) {
if err != nil {
b.Fatalf("Failed creating framework runtime: %v", err)
}
pl, err := New(&config.PodTopologySpreadArgs{DefaultingType: config.SystemDefaulting}, f)
pl, err := New(&config.PodTopologySpreadArgs{DefaultingType: config.SystemDefaulting}, f, feature.Features{})
if err != nil {
b.Fatalf("Failed creating plugin: %v", err)
}

View File

@ -50,6 +50,7 @@ func NewInTreeRegistry() runtime.Registry {
EnableReadWriteOncePod: feature.DefaultFeatureGate.Enabled(features.ReadWriteOncePod),
EnableVolumeCapacityPriority: feature.DefaultFeatureGate.Enabled(features.VolumeCapacityPriority),
EnableCSIStorageCapacity: feature.DefaultFeatureGate.Enabled(features.CSIStorageCapacity),
EnableMinDomainsInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MinDomainsInPodTopologySpread),
}
return runtime.Registry{
@ -59,7 +60,7 @@ func NewInTreeRegistry() runtime.Registry {
nodename.Name: nodename.New,
nodeports.Name: nodeports.New,
nodeaffinity.Name: nodeaffinity.New,
podtopologyspread.Name: podtopologyspread.New,
podtopologyspread.Name: runtime.FactoryAdapter(fts, podtopologyspread.New),
nodeunschedulable.Name: nodeunschedulable.New,
noderesources.Name: runtime.FactoryAdapter(fts, noderesources.NewFit),
noderesources.BalancedAllocationName: runtime.FactoryAdapter(fts, noderesources.NewBalancedAllocation),

View File

@ -66,6 +66,8 @@ import (
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
var podTopologySpreadFunc = frameworkruntime.FactoryAdapter(feature.Features{}, podtopologyspread.New)
func podWithID(id, desiredHost string) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
@ -2096,7 +2098,7 @@ func TestSchedulerSchedulePod(t *testing.T) {
st.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
st.RegisterPluginAsExtensions(
podtopologyspread.Name,
podtopologyspread.New,
podTopologySpreadFunc,
"PreFilter",
"Filter",
),
@ -2143,7 +2145,7 @@ func TestSchedulerSchedulePod(t *testing.T) {
st.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),
st.RegisterPluginAsExtensions(
podtopologyspread.Name,
podtopologyspread.New,
podTopologySpreadFunc,
"PreFilter",
"Filter",
),

File diff suppressed because it is too large Load Diff

View File

@ -5377,15 +5377,18 @@ message TopologySpreadConstraint {
// MaxSkew describes the degree to which pods may be unevenly distributed.
// When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference
// between the number of matching pods in the target topology and the global minimum.
// The global minimum is the minimum number of matching pods in an eligible domain
// or zero if the number of eligible domains is less than MinDomains.
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
// labelSelector spread as 1/1/0:
// labelSelector spread as 2/2/1:
// In this case, the global minimum is 1.
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P | P | |
// | P P | P P | P |
// +-------+-------+-------+
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1;
// scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2)
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2;
// scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2)
// violate MaxSkew(1).
// - if MaxSkew is 2, incoming pod can be scheduled onto any zone.
// When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence
@ -5397,6 +5400,10 @@ message TopologySpreadConstraint {
// and identical values are considered to be in the same topology.
// We consider each <key, value> as a "bucket", and try to put balanced number
// of pods into each bucket.
// We define a domain as a particular instance of a topology.
// Also, we define an eligible domain as a domain whose nodes match the node selector.
// e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology.
// And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology.
// It's a required field.
optional string topologyKey = 2;
@ -5428,6 +5435,33 @@ message TopologySpreadConstraint {
// in their corresponding topology domain.
// +optional
optional k8s.io.apimachinery.pkg.apis.meta.v1.LabelSelector labelSelector = 4;
// MinDomains indicates a minimum number of eligible domains.
// When the number of eligible domains with matching topology keys is less than minDomains,
// Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed.
// And when the number of eligible domains with matching topology keys equals or greater than minDomains,
// this value has no effect on scheduling.
// As a result, when the number of eligible domains is less than minDomains,
// scheduler won't schedule more than maxSkew Pods to those domains.
// If value is nil, the constraint behaves as if MinDomains is equal to 1.
// Valid values are integers greater than 0.
// When value is not nil, WhenUnsatisfiable must be DoNotSchedule.
//
// For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same
// labelSelector spread as 2/2/2:
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P P | P P | P P |
// +-------+-------+-------+
// The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0.
// In this situation, new pod with the same labelSelector cannot be scheduled,
// because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones,
// it will violate MaxSkew.
//
// This is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.
// +optional
optional int32 minDomains = 5;
}
// TypedLocalObjectReference contains enough information to let you locate the

View File

@ -3339,15 +3339,18 @@ type TopologySpreadConstraint struct {
// MaxSkew describes the degree to which pods may be unevenly distributed.
// When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference
// between the number of matching pods in the target topology and the global minimum.
// The global minimum is the minimum number of matching pods in an eligible domain
// or zero if the number of eligible domains is less than MinDomains.
// For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same
// labelSelector spread as 1/1/0:
// labelSelector spread as 2/2/1:
// In this case, the global minimum is 1.
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P | P | |
// | P P | P P | P |
// +-------+-------+-------+
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 1/1/1;
// scheduling it onto zone1(zone2) would make the ActualSkew(2-0) on zone1(zone2)
// - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2;
// scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2)
// violate MaxSkew(1).
// - if MaxSkew is 2, incoming pod can be scheduled onto any zone.
// When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence
@ -3358,6 +3361,10 @@ type TopologySpreadConstraint struct {
// and identical values are considered to be in the same topology.
// We consider each <key, value> as a "bucket", and try to put balanced number
// of pods into each bucket.
// We define a domain as a particular instance of a topology.
// Also, we define an eligible domain as a domain whose nodes match the node selector.
// e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology.
// And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology.
// It's a required field.
TopologyKey string `json:"topologyKey" protobuf:"bytes,2,opt,name=topologyKey"`
// WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy
@ -3387,6 +3394,32 @@ type TopologySpreadConstraint struct {
// in their corresponding topology domain.
// +optional
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty" protobuf:"bytes,4,opt,name=labelSelector"`
// MinDomains indicates a minimum number of eligible domains.
// When the number of eligible domains with matching topology keys is less than minDomains,
// Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed.
// And when the number of eligible domains with matching topology keys equals or greater than minDomains,
// this value has no effect on scheduling.
// As a result, when the number of eligible domains is less than minDomains,
// scheduler won't schedule more than maxSkew Pods to those domains.
// If value is nil, the constraint behaves as if MinDomains is equal to 1.
// Valid values are integers greater than 0.
// When value is not nil, WhenUnsatisfiable must be DoNotSchedule.
//
// For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same
// labelSelector spread as 2/2/2:
// +-------+-------+-------+
// | zone1 | zone2 | zone3 |
// +-------+-------+-------+
// | P P | P P | P P |
// +-------+-------+-------+
// The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0.
// In this situation, new pod with the same labelSelector cannot be scheduled,
// because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones,
// it will violate MaxSkew.
//
// This is an alpha field and requires enabling MinDomainsInPodTopologySpread feature gate.
// +optional
MinDomains *int32 `json:"minDomains,omitempty" protobuf:"varint,5,opt,name=minDomains"`
}
const (

View File

@ -2400,10 +2400,11 @@ func (TopologySelectorTerm) SwaggerDoc() map[string]string {
var map_TopologySpreadConstraint = map[string]string{
"": "TopologySpreadConstraint specifies how to spread matching pods among the given topology.",
"maxSkew": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 1/1/0: ",
"topologyKey": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. It's a required field.",
"maxSkew": "MaxSkew describes the degree to which pods may be unevenly distributed. When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In this case, the global minimum is 1. ",
"topologyKey": "TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same topology. We consider each <key, value> as a \"bucket\", and try to put balanced number of pods into each bucket. We define a domain as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes match the node selector. e.g. If TopologyKey is \"kubernetes.io/hostname\", each Node is a domain of that topology. And, if TopologyKey is \"topology.kubernetes.io/zone\", each zone is a domain of that topology. It's a required field.",
"whenUnsatisfiable": "WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy the spread constraint. - DoNotSchedule (default) tells the scheduler not to schedule it. - ScheduleAnyway tells the scheduler to schedule the pod in any location,\n but giving higher precedence to topologies that would help reduce the\n skew.\nA constraint is considered \"Unsatisfiable\" for an incoming pod if and only if every possible node assignment for that pod would violate \"MaxSkew\" on some topology. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 3/1/1: ",
"labelSelector": "LabelSelector is used to find matching pods. Pods that match this label selector are counted to determine the number of pods in their corresponding topology domain.",
"minDomains": "MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is less than minDomains, Pod Topology Spread treats \"global minimum\" as 0, and then the calculation of Skew is performed. And when the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling. As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule.\n\nFor example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: ",
}
func (TopologySpreadConstraint) SwaggerDoc() map[string]string {

View File

@ -5644,6 +5644,11 @@ func (in *TopologySpreadConstraint) DeepCopyInto(out *TopologySpreadConstraint)
*out = new(metav1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.MinDomains != nil {
in, out := &in.MinDomains, &out.MinDomains
*out = new(int32)
**out = **in
}
return
}

View File

@ -1616,7 +1616,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -849,6 +849,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1617,7 +1617,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -857,6 +857,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1618,7 +1618,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -849,6 +849,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1617,7 +1617,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -855,6 +855,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1617,7 +1617,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -859,6 +859,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1617,7 +1617,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -855,6 +855,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1616,7 +1616,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -849,6 +849,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1617,7 +1617,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -857,6 +857,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1618,7 +1618,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -849,6 +849,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1617,7 +1617,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -855,6 +855,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1670,7 +1670,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -891,6 +891,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1621,7 +1621,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -854,6 +854,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1670,7 +1670,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -891,6 +891,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1665,7 +1665,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -888,6 +888,7 @@ template:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1557,7 +1557,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -804,6 +804,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1601,7 +1601,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -838,6 +838,7 @@ template:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1607,7 +1607,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -843,6 +843,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1616,7 +1616,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -849,6 +849,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1617,7 +1617,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -859,6 +859,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -1618,7 +1618,8 @@
]
}
]
}
},
"minDomains": 5
}
],
"setHostnameAsFQDN": true,

View File

@ -849,6 +849,7 @@ spec:
matchLabels:
matchLabelsKey: matchLabelsValue
maxSkew: 1
minDomains: 5
topologyKey: topologyKeyValue
whenUnsatisfiable: whenUnsatisfiableValue
volumes:

View File

@ -30,6 +30,7 @@ type TopologySpreadConstraintApplyConfiguration struct {
TopologyKey *string `json:"topologyKey,omitempty"`
WhenUnsatisfiable *v1.UnsatisfiableConstraintAction `json:"whenUnsatisfiable,omitempty"`
LabelSelector *metav1.LabelSelectorApplyConfiguration `json:"labelSelector,omitempty"`
MinDomains *int32 `json:"minDomains,omitempty"`
}
// TopologySpreadConstraintApplyConfiguration constructs an declarative configuration of the TopologySpreadConstraint type for use with
@ -69,3 +70,11 @@ func (b *TopologySpreadConstraintApplyConfiguration) WithLabelSelector(value *me
b.LabelSelector = value
return b
}
// WithMinDomains sets the MinDomains field in the declarative configuration to the given value
// and returns the receiver, so that objects can be built by chaining "With" function invocations.
// If called multiple times, the MinDomains field is set to the value of the last call.
func (b *TopologySpreadConstraintApplyConfiguration) WithMinDomains(value int32) *TopologySpreadConstraintApplyConfiguration {
b.MinDomains = &value
return b
}

View File

@ -6844,6 +6844,9 @@ var schemaYAML = typed.YAMLObject(`types:
type:
scalar: numeric
default: 0
- name: minDomains
type:
scalar: numeric
- name: topologyKey
type:
scalar: string