Add CDI devices to device plugin API
This change adds CDI device IDs to the ContainerAllocateResponse in the device plugin API. This allows a device plugin to specify CDI devices by their unique fully-qualified CDI device names using the related field in the CRI specification. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
@@ -221,6 +221,13 @@ const (
|
||||
// (e.g. in a Deployment), which is the historical default.
|
||||
DefaultHostNetworkHostPortsInPodTemplates featuregate.Feature = "DefaultHostNetworkHostPortsInPodTemplates"
|
||||
|
||||
// owner: @elezar
|
||||
// kep: http://kep.k8s.io/4009
|
||||
// alpha: v1.28
|
||||
//
|
||||
// Add support for CDI Device IDs in the Device Plugin API.
|
||||
DevicePluginCDIDevices featuregate.Feature = "DevicePluginCDIDevices"
|
||||
|
||||
// owner: @andrewsykim
|
||||
// alpha: v1.22
|
||||
//
|
||||
@@ -908,6 +915,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
|
||||
|
||||
DisableKubeletCloudCredentialProviders: {Default: false, PreRelease: featuregate.Alpha},
|
||||
|
||||
DevicePluginCDIDevices: {Default: false, PreRelease: featuregate.Alpha},
|
||||
|
||||
DownwardAPIHugePages: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in v1.29
|
||||
|
||||
DynamicResourceAllocation: {Default: false, PreRelease: featuregate.Alpha},
|
||||
|
||||
@@ -673,6 +673,7 @@ func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Containe
|
||||
opts.Mounts = append(opts.Mounts, devOpts.Mounts...)
|
||||
opts.Envs = append(opts.Envs, devOpts.Envs...)
|
||||
opts.Annotations = append(opts.Annotations, devOpts.Annotations...)
|
||||
opts.CDIDevices = append(opts.CDIDevices, devOpts.CDIDevices...)
|
||||
return opts, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -573,9 +573,10 @@ func constructDevices(devices []string) checkpoint.DevicesPerNUMA {
|
||||
|
||||
// containerAllocateResponseBuilder is a helper to build a ContainerAllocateResponse
|
||||
type containerAllocateResponseBuilder struct {
|
||||
devices map[string]string
|
||||
mounts map[string]string
|
||||
envs map[string]string
|
||||
devices map[string]string
|
||||
mounts map[string]string
|
||||
envs map[string]string
|
||||
cdiDevices []string
|
||||
}
|
||||
|
||||
// containerAllocateResponseBuilderOption defines a functional option for a containerAllocateResponseBuilder
|
||||
@@ -602,6 +603,13 @@ func withEnvs(envs map[string]string) containerAllocateResponseBuilderOption {
|
||||
}
|
||||
}
|
||||
|
||||
// withCDIDevices sets the cdiDevices for the containerAllocateResponseBuilder
|
||||
func withCDIDevices(cdiDevices ...string) containerAllocateResponseBuilderOption {
|
||||
return func(b *containerAllocateResponseBuilder) {
|
||||
b.cdiDevices = cdiDevices
|
||||
}
|
||||
}
|
||||
|
||||
// newContainerAllocateResponse creates a ContainerAllocateResponse with the given options.
|
||||
func newContainerAllocateResponse(opts ...containerAllocateResponseBuilderOption) *pluginapi.ContainerAllocateResponse {
|
||||
b := &containerAllocateResponseBuilder{}
|
||||
@@ -633,6 +641,16 @@ func (b *containerAllocateResponseBuilder) Build() *pluginapi.ContainerAllocateR
|
||||
for k, v := range b.envs {
|
||||
resp.Envs[k] = v
|
||||
}
|
||||
|
||||
var cdiDevices []*pluginapi.CDIDevice
|
||||
for _, dev := range b.cdiDevices {
|
||||
cdiDevice := pluginapi.CDIDevice{
|
||||
Name: dev,
|
||||
}
|
||||
cdiDevices = append(cdiDevices, &cdiDevice)
|
||||
}
|
||||
resp.CDIDevices = cdiDevices
|
||||
|
||||
return resp
|
||||
}
|
||||
|
||||
@@ -660,6 +678,7 @@ func TestCheckpoint(t *testing.T) {
|
||||
newContainerAllocateResponse(
|
||||
withDevices(map[string]string{"/dev/r1dev1": "/dev/r1dev1", "/dev/r1dev2": "/dev/r1dev2"}),
|
||||
withMounts(map[string]string{"/home/r1lib1": "/usr/r1lib1"}),
|
||||
withCDIDevices("domain1.com/resource1=dev1", "domain1.com/resource1=dev2"),
|
||||
),
|
||||
)
|
||||
testManager.podDevices.insert("pod1", "con1", resourceName2,
|
||||
|
||||
@@ -21,9 +21,13 @@ import (
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
kubefeatures "k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/util/cdi"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
@@ -244,6 +248,8 @@ func (pdev *podDevices) deviceRunContainerOptions(podUID, contName string) *Devi
|
||||
mountsMap := make(map[string]string)
|
||||
envsMap := make(map[string]string)
|
||||
annotationsMap := make(map[string]string)
|
||||
// Keep track of all CDI devices requested for the container.
|
||||
allCDIDevices := sets.New[string]()
|
||||
// Loops through AllocationResponses of all cached device resources.
|
||||
for _, devices := range resources {
|
||||
resp := devices.allocResp
|
||||
@@ -252,6 +258,7 @@ func (pdev *podDevices) deviceRunContainerOptions(podUID, contName string) *Devi
|
||||
// Mount points
|
||||
// Device files
|
||||
// Container annotations
|
||||
// CDI device IDs
|
||||
// These artifacts are per resource per container.
|
||||
// Updates RunContainerOptions.Envs.
|
||||
for k, v := range resp.Envs {
|
||||
@@ -321,10 +328,78 @@ func (pdev *podDevices) deviceRunContainerOptions(podUID, contName string) *Devi
|
||||
annotationsMap[k] = v
|
||||
opts.Annotations = append(opts.Annotations, kubecontainer.Annotation{Name: k, Value: v})
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DevicePluginCDIDevices) {
|
||||
// Updates for CDI devices.
|
||||
cdiDevices := getCDIDeviceInfo(resp, allCDIDevices)
|
||||
opts.CDIDevices = append(opts.CDIDevices, cdiDevices...)
|
||||
}
|
||||
}
|
||||
|
||||
// Although the CDI devices are expected to be empty when this feature is disabled, we still
|
||||
// guard this with a feature gate to avoid any potential issues.
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DevicePluginCDIDevices) {
|
||||
// We construct a resource ID from the pod UID and container name.
|
||||
// This ID has no semantic meaning, and is only used to ensure that the generated CDI annotation key is unique
|
||||
// for a given container. Since this is only called once per pod-container combination, this should be the case.
|
||||
resourceID := podUID + "-" + contName
|
||||
cdiAnnotations := getCDIAnnotations(resourceID, allCDIDevices, annotationsMap)
|
||||
opts.Annotations = append(opts.Annotations, cdiAnnotations...)
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
// getCDIAnnotations returns the cdi annotations for a given container.
|
||||
// This creates a CDI annotation with a key of the form: devicemanager_{{resourceID}}.
|
||||
// The value of the annotation is a comma separated list of sorted CDI device IDs.
|
||||
// If the annotation key is already defined in the provided annotations map, then the existing value is used.
|
||||
func getCDIAnnotations(resourceID string, cdiDevices sets.Set[string], annotationsMap map[string]string) []kubecontainer.Annotation {
|
||||
// We sort the CDI devices to ensure that the annotation value is deterministic.
|
||||
sortedCDIDevices := sets.List[string](cdiDevices)
|
||||
annotations, err := cdi.GenerateAnnotations(types.UID(resourceID), "devicemanager", sortedCDIDevices)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to create CDI annotations")
|
||||
return nil
|
||||
}
|
||||
|
||||
var cdiAnnotations []kubecontainer.Annotation
|
||||
for _, annotation := range annotations {
|
||||
if e, ok := annotationsMap[annotation.Name]; ok {
|
||||
klog.V(4).InfoS("Skip existing annotation", "annotationKey", annotation.Name, "annotationValue", annotation.Value)
|
||||
if e != annotation.Value {
|
||||
klog.ErrorS(nil, "Annotation has conflicting setting", "annotationKey", annotation.Name, "expected", e, "got", annotation.Value)
|
||||
}
|
||||
continue
|
||||
}
|
||||
klog.V(4).InfoS("Add annotation", "annotationKey", annotation.Name, "annotationValue", annotation.Value)
|
||||
annotationsMap[annotation.Name] = annotation.Value
|
||||
cdiAnnotations = append(cdiAnnotations, kubecontainer.Annotation{Name: annotation.Name, Value: annotation.Value})
|
||||
}
|
||||
|
||||
return cdiAnnotations
|
||||
}
|
||||
|
||||
// getCDIDeviceInfo returns CDI devices from an allocate response
|
||||
func getCDIDeviceInfo(resp *pluginapi.ContainerAllocateResponse, knownCDIDevices sets.Set[string]) []kubecontainer.CDIDevice {
|
||||
var cdiDevices []kubecontainer.CDIDevice
|
||||
for _, cdiDevice := range resp.CDIDevices {
|
||||
if knownCDIDevices.Has(cdiDevice.Name) {
|
||||
klog.V(4).InfoS("Skip existing CDI Device", "name", cdiDevice.Name)
|
||||
continue
|
||||
}
|
||||
klog.V(4).InfoS("Add CDI device", "name", cdiDevice.Name)
|
||||
knownCDIDevices.Insert(cdiDevice.Name)
|
||||
|
||||
device := kubecontainer.CDIDevice{
|
||||
Name: cdiDevice.Name,
|
||||
}
|
||||
cdiDevices = append(cdiDevices, device)
|
||||
}
|
||||
|
||||
return cdiDevices
|
||||
}
|
||||
|
||||
// getContainerDevices returns the devices assigned to the provided container for all ResourceNames
|
||||
func (pdev *podDevices) getContainerDevices(podUID, contName string) ResourceDeviceInstances {
|
||||
pdev.RLock()
|
||||
|
||||
@@ -20,11 +20,16 @@ import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
)
|
||||
|
||||
func TestGetContainerDevices(t *testing.T) {
|
||||
@@ -153,3 +158,137 @@ func expectResourceDeviceInstances(t *testing.T, resp ResourceDeviceInstances, e
|
||||
t.Errorf("expected %q got %q", expected, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeviceRunContainerOptions(t *testing.T) {
|
||||
const (
|
||||
podUID = "pod"
|
||||
containerName = "container"
|
||||
resource1 = "example1.com/resource1"
|
||||
resource2 = "example2.com/resource2"
|
||||
)
|
||||
testCases := []struct {
|
||||
description string
|
||||
gate bool
|
||||
responsesPerResource map[string]*pluginapi.ContainerAllocateResponse
|
||||
expected *DeviceRunContainerOptions
|
||||
}{
|
||||
{
|
||||
description: "empty response",
|
||||
gate: false,
|
||||
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
|
||||
resource1: newContainerAllocateResponse(),
|
||||
},
|
||||
expected: &DeviceRunContainerOptions{},
|
||||
},
|
||||
{
|
||||
description: "cdi devices are ingored when feature gate is disabled",
|
||||
gate: false,
|
||||
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
|
||||
resource1: newContainerAllocateResponse(
|
||||
withDevices(map[string]string{"/dev/r1": "/dev/r1"}),
|
||||
withMounts(map[string]string{"/home/lib1": "/home/lib1"}),
|
||||
withEnvs(map[string]string{"ENV1": "VALUE1"}),
|
||||
withCDIDevices("vendor1.com/class1=device1", "vendor2.com/class2=device2"),
|
||||
),
|
||||
},
|
||||
expected: &DeviceRunContainerOptions{
|
||||
Devices: []kubecontainer.DeviceInfo{
|
||||
{PathOnHost: "/dev/r1", PathInContainer: "/dev/r1", Permissions: "mrw"},
|
||||
},
|
||||
Mounts: []kubecontainer.Mount{
|
||||
{Name: "/home/lib1", HostPath: "/home/lib1", ContainerPath: "/home/lib1", ReadOnly: true},
|
||||
},
|
||||
Envs: []kubecontainer.EnvVar{
|
||||
{Name: "ENV1", Value: "VALUE1"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "cdi devices are handled when feature gate is enabled",
|
||||
gate: true,
|
||||
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
|
||||
resource1: newContainerAllocateResponse(
|
||||
withCDIDevices("vendor1.com/class1=device1", "vendor2.com/class2=device2"),
|
||||
),
|
||||
},
|
||||
expected: &DeviceRunContainerOptions{
|
||||
Annotations: []kubecontainer.Annotation{
|
||||
{Name: "cdi.k8s.io/devicemanager_pod-container", Value: "vendor1.com/class1=device1,vendor2.com/class2=device2"},
|
||||
},
|
||||
CDIDevices: []kubecontainer.CDIDevice{
|
||||
{Name: "vendor1.com/class1=device1"},
|
||||
{Name: "vendor2.com/class2=device2"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "cdi devices from multiple resources are handled when feature gate is enabled",
|
||||
gate: true,
|
||||
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
|
||||
resource1: newContainerAllocateResponse(
|
||||
withCDIDevices("vendor1.com/class1=device1", "vendor2.com/class2=device2"),
|
||||
),
|
||||
resource2: newContainerAllocateResponse(
|
||||
withCDIDevices("vendor3.com/class3=device3", "vendor4.com/class4=device4"),
|
||||
),
|
||||
},
|
||||
expected: &DeviceRunContainerOptions{
|
||||
Annotations: []kubecontainer.Annotation{
|
||||
{Name: "cdi.k8s.io/devicemanager_pod-container", Value: "vendor1.com/class1=device1,vendor2.com/class2=device2,vendor3.com/class3=device3,vendor4.com/class4=device4"},
|
||||
},
|
||||
CDIDevices: []kubecontainer.CDIDevice{
|
||||
{Name: "vendor1.com/class1=device1"},
|
||||
{Name: "vendor2.com/class2=device2"},
|
||||
{Name: "vendor3.com/class3=device3"},
|
||||
{Name: "vendor4.com/class4=device4"},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "duplicate cdi devices are skipped",
|
||||
gate: true,
|
||||
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
|
||||
resource1: newContainerAllocateResponse(
|
||||
withCDIDevices("vendor1.com/class1=device1", "vendor2.com/class2=device2"),
|
||||
),
|
||||
resource2: newContainerAllocateResponse(
|
||||
withCDIDevices("vendor2.com/class2=device2", "vendor3.com/class3=device3"),
|
||||
),
|
||||
},
|
||||
expected: &DeviceRunContainerOptions{
|
||||
Annotations: []kubecontainer.Annotation{
|
||||
{Name: "cdi.k8s.io/devicemanager_pod-container", Value: "vendor1.com/class1=device1,vendor2.com/class2=device2,vendor3.com/class3=device3"},
|
||||
},
|
||||
CDIDevices: []kubecontainer.CDIDevice{
|
||||
{Name: "vendor1.com/class1=device1"},
|
||||
{Name: "vendor2.com/class2=device2"},
|
||||
{Name: "vendor3.com/class3=device3"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
as := assert.New(t)
|
||||
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DevicePluginCDIDevices, tc.gate)()
|
||||
podDevices := newPodDevices()
|
||||
for resourceName, response := range tc.responsesPerResource {
|
||||
podDevices.insert("pod", "container", resourceName,
|
||||
nil,
|
||||
response,
|
||||
)
|
||||
}
|
||||
opts := podDevices.deviceRunContainerOptions(podUID, containerName)
|
||||
|
||||
// The exact ordering of the options depends on the order of the resources in the map.
|
||||
// We therefore use `ElementsMatch` instead of `Equal` on the member slices.
|
||||
as.ElementsMatch(tc.expected.Annotations, opts.Annotations)
|
||||
as.ElementsMatch(tc.expected.CDIDevices, opts.CDIDevices)
|
||||
as.ElementsMatch(tc.expected.Devices, opts.Devices)
|
||||
as.ElementsMatch(tc.expected.Envs, opts.Envs)
|
||||
as.ElementsMatch(tc.expected.Mounts, opts.Mounts)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,6 +91,8 @@ type DeviceRunContainerOptions struct {
|
||||
Devices []kubecontainer.DeviceInfo
|
||||
// The Annotations for the container
|
||||
Annotations []kubecontainer.Annotation
|
||||
// CDI Devices for the container
|
||||
CDIDevices []kubecontainer.CDIDevice
|
||||
}
|
||||
|
||||
// TODO: evaluate whether we need this error definition.
|
||||
|
||||
Reference in New Issue
Block a user