Split CPUManager takeByTopology() into two different algorithms
The first implements the original algorithm which packs CPUs onto NUMA nodes if more than one NUMA node is required to satisfy the allocation. The second disitributes CPUs across NUMA nodes if they can't all fit into one. The "distributing" algorithm is currently a noop and just returns an error of "unimplemented". A subsequent commit will add the logic to implement this algorithm according to KEP 2902: https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2902-cpumanager-distribute-cpus-policy-option Signed-off-by: Kevin Klues <kklues@nvidia.com>
This commit is contained in:
parent
0e7928edce
commit
462544d079
@ -318,7 +318,7 @@ func (a *cpuAccumulator) isFailed() bool {
|
|||||||
return a.numCPUsNeeded > a.details.CPUs().Size()
|
return a.numCPUsNeeded > a.details.CPUs().Size()
|
||||||
}
|
}
|
||||||
|
|
||||||
func takeByTopology(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) {
|
func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) {
|
||||||
acc := newCPUAccumulator(topo, availableCPUs, numCPUs)
|
acc := newCPUAccumulator(topo, availableCPUs, numCPUs)
|
||||||
if acc.isSatisfied() {
|
if acc.isSatisfied() {
|
||||||
return acc.result, nil
|
return acc.result, nil
|
||||||
@ -358,3 +358,7 @@ func takeByTopology(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, num
|
|||||||
|
|
||||||
return cpuset.NewCPUSet(), fmt.Errorf("failed to allocate cpus")
|
return cpuset.NewCPUSet(), fmt.Errorf("failed to allocate cpus")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) {
|
||||||
|
return cpuset.NewCPUSet(), fmt.Errorf("unimplemented")
|
||||||
|
}
|
||||||
|
@ -506,7 +506,7 @@ func TestCPUAccumulatorTake(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTakeByTopology(t *testing.T) {
|
func TestTakeByTopologyNUMAPacked(t *testing.T) {
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
description string
|
description string
|
||||||
topo *topology.CPUTopology
|
topo *topology.CPUTopology
|
||||||
@ -631,7 +631,7 @@ func TestTakeByTopology(t *testing.T) {
|
|||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
result, err := takeByTopology(tc.topo, tc.availableCPUs, tc.numCPUs)
|
result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs)
|
||||||
if tc.expErr != "" && err.Error() != tc.expErr {
|
if tc.expErr != "" && err.Error() != tc.expErr {
|
||||||
t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err)
|
t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err)
|
||||||
}
|
}
|
||||||
|
@ -118,6 +118,13 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
|||||||
|
|
||||||
klog.InfoS("Static policy created with configuration", "options", opts)
|
klog.InfoS("Static policy created with configuration", "options", opts)
|
||||||
|
|
||||||
|
policy := &staticPolicy{
|
||||||
|
topology: topology,
|
||||||
|
affinity: affinity,
|
||||||
|
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||||
|
options: opts,
|
||||||
|
}
|
||||||
|
|
||||||
allCPUs := topology.CPUDetails.CPUs()
|
allCPUs := topology.CPUDetails.CPUs()
|
||||||
var reserved cpuset.CPUSet
|
var reserved cpuset.CPUSet
|
||||||
if reservedCPUs.Size() > 0 {
|
if reservedCPUs.Size() > 0 {
|
||||||
@ -128,7 +135,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
|||||||
//
|
//
|
||||||
// For example: Given a system with 8 CPUs available and HT enabled,
|
// For example: Given a system with 8 CPUs available and HT enabled,
|
||||||
// if numReservedCPUs=2, then reserved={0,4}
|
// if numReservedCPUs=2, then reserved={0,4}
|
||||||
reserved, _ = takeByTopology(topology, allCPUs, numReservedCPUs)
|
reserved, _ = policy.takeByTopology(allCPUs, numReservedCPUs)
|
||||||
}
|
}
|
||||||
|
|
||||||
if reserved.Size() != numReservedCPUs {
|
if reserved.Size() != numReservedCPUs {
|
||||||
@ -137,14 +144,9 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
|||||||
}
|
}
|
||||||
|
|
||||||
klog.InfoS("Reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved)
|
klog.InfoS("Reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved)
|
||||||
|
policy.reserved = reserved
|
||||||
|
|
||||||
return &staticPolicy{
|
return policy, nil
|
||||||
topology: topology,
|
|
||||||
reserved: reserved,
|
|
||||||
affinity: affinity,
|
|
||||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
|
||||||
options: opts,
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *staticPolicy) Name() string {
|
func (p *staticPolicy) Name() string {
|
||||||
@ -318,7 +320,7 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit
|
|||||||
numAlignedToAlloc = numCPUs
|
numAlignedToAlloc = numCPUs
|
||||||
}
|
}
|
||||||
|
|
||||||
alignedCPUs, err := takeByTopology(p.topology, alignedCPUs, numAlignedToAlloc)
|
alignedCPUs, err := p.takeByTopology(alignedCPUs, numAlignedToAlloc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return cpuset.NewCPUSet(), err
|
return cpuset.NewCPUSet(), err
|
||||||
}
|
}
|
||||||
@ -327,7 +329,7 @@ func (p *staticPolicy) allocateCPUs(s state.State, numCPUs int, numaAffinity bit
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get any remaining CPUs from what's leftover after attempting to grab aligned ones.
|
// Get any remaining CPUs from what's leftover after attempting to grab aligned ones.
|
||||||
remainingCPUs, err := takeByTopology(p.topology, allocatableCPUs.Difference(result), numCPUs-result.Size())
|
remainingCPUs, err := p.takeByTopology(allocatableCPUs.Difference(result), numCPUs-result.Size())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return cpuset.NewCPUSet(), err
|
return cpuset.NewCPUSet(), err
|
||||||
}
|
}
|
||||||
@ -381,6 +383,13 @@ func (p *staticPolicy) podGuaranteedCPUs(pod *v1.Pod) int {
|
|||||||
return requestedByAppContainers
|
return requestedByAppContainers
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) {
|
||||||
|
if p.options.DistributeCPUsAcrossNUMA {
|
||||||
|
return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs)
|
||||||
|
}
|
||||||
|
return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs)
|
||||||
|
}
|
||||||
|
|
||||||
func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||||
// Get a count of how many guaranteed CPUs have been requested.
|
// Get a count of how many guaranteed CPUs have been requested.
|
||||||
requested := p.guaranteedCPUs(pod, container)
|
requested := p.guaranteedCPUs(pod, container)
|
||||||
|
Loading…
Reference in New Issue
Block a user