Avoid undesirable allocation when device is associated with multiple NUMA Nodes

suppose there are two devices dev1 and dev2, each has NUMA Nodes associated as below:
  dev1: numa1
  dev2: numa1, numa2

and we request a device from numa2, currently filterByAffinity() will return
[], [dev1, dev2], [] if loop of available devices produce a sequence of [dev1, dev2],
that is is not desirable as what we truely expect is an allocation of dev2 from numa2.
This commit is contained in:
kikimo 2021-05-16 23:30:19 +08:00
parent 04b7977ef6
commit 2ef1f81076
2 changed files with 117 additions and 2 deletions

View File

@ -791,9 +791,23 @@ func (m *ManagerImpl) filterByAffinity(podUID, contName, resource string, availa
nodes = append(nodes, node)
}
// Sort the list of nodes by how many devices they contain.
// Sort the list of nodes by nodes from affinity set, nodes from none-affinity
// set, nodes without topology set and how many devices they contain.
sort.Slice(nodes, func(i, j int) bool {
return perNodeDevices[i].Len() < perNodeDevices[j].Len()
ni, nj := nodes[i], nodes[j]
// 1. node[i] from to affninity set
if hint.NUMANodeAffinity.IsSet(ni) {
return !hint.NUMANodeAffinity.IsSet(nj) || perNodeDevices[ni].Len() < perNodeDevices[nj].Len()
}
// 2. node[i] from none-affinity set
if ni != nodeWithoutTopology {
return !hint.NUMANodeAffinity.IsSet(nj) &&
(nj == nodeWithoutTopology || perNodeDevices[ni].Len() < perNodeDevices[nj].Len())
}
// 3. node[i] from node without topology set
return nj == nodeWithoutTopology && perNodeDevices[ni].Len() < perNodeDevices[nj].Len()
})
// Generate three sorted lists of devices. Devices in the first list come

View File

@ -40,6 +40,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
"k8s.io/kubernetes/pkg/kubelet/config"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/pluginmanager"
@ -669,6 +670,106 @@ type TestResource struct {
topology bool
}
func TestFilterByAffinity(t *testing.T) {
as := require.New(t)
allDevices := ResourceDeviceInstances{
"res1": map[string]pluginapi.Device{
"dev1": {
ID: "dev1",
Topology: &pluginapi.TopologyInfo{
Nodes: []*pluginapi.NUMANode{
{
ID: 1,
},
},
},
},
"dev2": {
ID: "dev2",
Topology: &pluginapi.TopologyInfo{
Nodes: []*pluginapi.NUMANode{
{
ID: 1,
},
{
ID: 2,
},
},
},
},
"dev3": {
ID: "dev3",
Topology: &pluginapi.TopologyInfo{
Nodes: []*pluginapi.NUMANode{
{
ID: 2,
},
},
},
},
"dev4": {
ID: "dev4",
Topology: &pluginapi.TopologyInfo{
Nodes: []*pluginapi.NUMANode{
{
ID: 2,
},
},
},
},
},
}
testManager := ManagerImpl{
topologyAffinityStore: newFakeTopologyManager(),
allDevices: allDevices,
}
testCases := []struct {
available sets.String
fromAffinityExpected sets.String
notFromAffinityExpected sets.String
withoutTopologyExpected sets.String
}{
{
available: sets.NewString("dev1", "dev2"),
fromAffinityExpected: sets.NewString("dev2"),
notFromAffinityExpected: sets.NewString("dev1"),
withoutTopologyExpected: sets.NewString(),
},
{
available: sets.NewString("dev1", "dev2", "dev3", "dev4"),
fromAffinityExpected: sets.NewString("dev2", "dev3", "dev4"),
notFromAffinityExpected: sets.NewString("dev1"),
withoutTopologyExpected: sets.NewString(),
},
}
for _, testCase := range testCases {
fromAffinity, notFromAffinity, withoutTopology := testManager.filterByAffinity("", "", "res1", testCase.available)
as.Truef(fromAffinity.Equal(testCase.fromAffinityExpected), "expect devices from affinity to be %v but got %v", testCase.fromAffinityExpected, fromAffinity)
as.Truef(notFromAffinity.Equal(testCase.notFromAffinityExpected), "expect devices not from affinity to be %v but got %v", testCase.notFromAffinityExpected, notFromAffinity)
as.Truef(withoutTopology.Equal(testCase.withoutTopologyExpected), "expect devices without topology to be %v but got %v", testCase.notFromAffinityExpected, notFromAffinity)
}
}
type fakeTopologyManager struct {
topologymanager.Manager
}
func newFakeTopologyManager() topologymanager.Manager {
return &fakeTopologyManager{}
}
func (m *fakeTopologyManager) GetAffinity(podUID string, containerName string) topologymanager.TopologyHint {
// return hint with numa2 set
affinity, _ := bitmask.NewBitMask(2)
return topologymanager.TopologyHint{
NUMANodeAffinity: affinity,
Preferred: true,
}
}
func TestPodContainerDeviceAllocation(t *testing.T) {
res1 := TestResource{
resourceName: "domain1.com/resource1",