Add CDI devices to device plugin API

This change adds CDI device IDs to the ContainerAllocateResponse in the
device plugin API. This allows a device plugin to specify CDI devices
by their unique fully-qualified CDI device names using the related field
in the CRI specification.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2023-05-16 15:12:32 +02:00
parent cd14e97ea8
commit b57c7e2fe4
8 changed files with 590 additions and 77 deletions

View File

@@ -221,6 +221,13 @@ const (
// (e.g. in a Deployment), which is the historical default.
DefaultHostNetworkHostPortsInPodTemplates featuregate.Feature = "DefaultHostNetworkHostPortsInPodTemplates"
// owner: @elezar
// kep: http://kep.k8s.io/4009
// alpha: v1.28
//
// Add support for CDI Device IDs in the Device Plugin API.
DevicePluginCDIDevices featuregate.Feature = "DevicePluginCDIDevices"
// owner: @andrewsykim
// alpha: v1.22
//
@@ -908,6 +915,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
DisableKubeletCloudCredentialProviders: {Default: false, PreRelease: featuregate.Alpha},
DevicePluginCDIDevices: {Default: false, PreRelease: featuregate.Alpha},
DownwardAPIHugePages: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in v1.29
DynamicResourceAllocation: {Default: false, PreRelease: featuregate.Alpha},

View File

@@ -673,6 +673,7 @@ func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Containe
opts.Mounts = append(opts.Mounts, devOpts.Mounts...)
opts.Envs = append(opts.Envs, devOpts.Envs...)
opts.Annotations = append(opts.Annotations, devOpts.Annotations...)
opts.CDIDevices = append(opts.CDIDevices, devOpts.CDIDevices...)
return opts, nil
}

View File

@@ -573,9 +573,10 @@ func constructDevices(devices []string) checkpoint.DevicesPerNUMA {
// containerAllocateResponseBuilder is a helper to build a ContainerAllocateResponse
type containerAllocateResponseBuilder struct {
devices map[string]string
mounts map[string]string
envs map[string]string
devices map[string]string
mounts map[string]string
envs map[string]string
cdiDevices []string
}
// containerAllocateResponseBuilderOption defines a functional option for a containerAllocateResponseBuilder
@@ -602,6 +603,13 @@ func withEnvs(envs map[string]string) containerAllocateResponseBuilderOption {
}
}
// withCDIDevices sets the cdiDevices for the containerAllocateResponseBuilder
func withCDIDevices(cdiDevices ...string) containerAllocateResponseBuilderOption {
return func(b *containerAllocateResponseBuilder) {
b.cdiDevices = cdiDevices
}
}
// newContainerAllocateResponse creates a ContainerAllocateResponse with the given options.
func newContainerAllocateResponse(opts ...containerAllocateResponseBuilderOption) *pluginapi.ContainerAllocateResponse {
b := &containerAllocateResponseBuilder{}
@@ -633,6 +641,16 @@ func (b *containerAllocateResponseBuilder) Build() *pluginapi.ContainerAllocateR
for k, v := range b.envs {
resp.Envs[k] = v
}
var cdiDevices []*pluginapi.CDIDevice
for _, dev := range b.cdiDevices {
cdiDevice := pluginapi.CDIDevice{
Name: dev,
}
cdiDevices = append(cdiDevices, &cdiDevice)
}
resp.CDIDevices = cdiDevices
return resp
}
@@ -660,6 +678,7 @@ func TestCheckpoint(t *testing.T) {
newContainerAllocateResponse(
withDevices(map[string]string{"/dev/r1dev1": "/dev/r1dev1", "/dev/r1dev2": "/dev/r1dev2"}),
withMounts(map[string]string{"/home/r1lib1": "/usr/r1lib1"}),
withCDIDevices("domain1.com/resource1=dev1", "domain1.com/resource1=dev2"),
),
)
testManager.podDevices.insert("pod1", "con1", resourceName2,

View File

@@ -21,9 +21,13 @@ import (
"k8s.io/klog/v2"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
"k8s.io/kubernetes/pkg/kubelet/cm/util/cdi"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
@@ -244,6 +248,8 @@ func (pdev *podDevices) deviceRunContainerOptions(podUID, contName string) *Devi
mountsMap := make(map[string]string)
envsMap := make(map[string]string)
annotationsMap := make(map[string]string)
// Keep track of all CDI devices requested for the container.
allCDIDevices := sets.New[string]()
// Loops through AllocationResponses of all cached device resources.
for _, devices := range resources {
resp := devices.allocResp
@@ -252,6 +258,7 @@ func (pdev *podDevices) deviceRunContainerOptions(podUID, contName string) *Devi
// Mount points
// Device files
// Container annotations
// CDI device IDs
// These artifacts are per resource per container.
// Updates RunContainerOptions.Envs.
for k, v := range resp.Envs {
@@ -321,10 +328,78 @@ func (pdev *podDevices) deviceRunContainerOptions(podUID, contName string) *Devi
annotationsMap[k] = v
opts.Annotations = append(opts.Annotations, kubecontainer.Annotation{Name: k, Value: v})
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DevicePluginCDIDevices) {
// Updates for CDI devices.
cdiDevices := getCDIDeviceInfo(resp, allCDIDevices)
opts.CDIDevices = append(opts.CDIDevices, cdiDevices...)
}
}
// Although the CDI devices are expected to be empty when this feature is disabled, we still
// guard this with a feature gate to avoid any potential issues.
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DevicePluginCDIDevices) {
// We construct a resource ID from the pod UID and container name.
// This ID has no semantic meaning, and is only used to ensure that the generated CDI annotation key is unique
// for a given container. Since this is only called once per pod-container combination, this should be the case.
resourceID := podUID + "-" + contName
cdiAnnotations := getCDIAnnotations(resourceID, allCDIDevices, annotationsMap)
opts.Annotations = append(opts.Annotations, cdiAnnotations...)
}
return opts
}
// getCDIAnnotations returns the cdi annotations for a given container.
// This creates a CDI annotation with a key of the form: devicemanager_{{resourceID}}.
// The value of the annotation is a comma separated list of sorted CDI device IDs.
// If the annotation key is already defined in the provided annotations map, then the existing value is used.
func getCDIAnnotations(resourceID string, cdiDevices sets.Set[string], annotationsMap map[string]string) []kubecontainer.Annotation {
// We sort the CDI devices to ensure that the annotation value is deterministic.
sortedCDIDevices := sets.List[string](cdiDevices)
annotations, err := cdi.GenerateAnnotations(types.UID(resourceID), "devicemanager", sortedCDIDevices)
if err != nil {
klog.ErrorS(err, "Failed to create CDI annotations")
return nil
}
var cdiAnnotations []kubecontainer.Annotation
for _, annotation := range annotations {
if e, ok := annotationsMap[annotation.Name]; ok {
klog.V(4).InfoS("Skip existing annotation", "annotationKey", annotation.Name, "annotationValue", annotation.Value)
if e != annotation.Value {
klog.ErrorS(nil, "Annotation has conflicting setting", "annotationKey", annotation.Name, "expected", e, "got", annotation.Value)
}
continue
}
klog.V(4).InfoS("Add annotation", "annotationKey", annotation.Name, "annotationValue", annotation.Value)
annotationsMap[annotation.Name] = annotation.Value
cdiAnnotations = append(cdiAnnotations, kubecontainer.Annotation{Name: annotation.Name, Value: annotation.Value})
}
return cdiAnnotations
}
// getCDIDeviceInfo returns CDI devices from an allocate response
func getCDIDeviceInfo(resp *pluginapi.ContainerAllocateResponse, knownCDIDevices sets.Set[string]) []kubecontainer.CDIDevice {
var cdiDevices []kubecontainer.CDIDevice
for _, cdiDevice := range resp.CDIDevices {
if knownCDIDevices.Has(cdiDevice.Name) {
klog.V(4).InfoS("Skip existing CDI Device", "name", cdiDevice.Name)
continue
}
klog.V(4).InfoS("Add CDI device", "name", cdiDevice.Name)
knownCDIDevices.Insert(cdiDevice.Name)
device := kubecontainer.CDIDevice{
Name: cdiDevice.Name,
}
cdiDevices = append(cdiDevices, device)
}
return cdiDevices
}
// getContainerDevices returns the devices assigned to the provided container for all ResourceNames
func (pdev *podDevices) getContainerDevices(podUID, contName string) ResourceDeviceInstances {
pdev.RLock()

View File

@@ -20,11 +20,16 @@ import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
func TestGetContainerDevices(t *testing.T) {
@@ -153,3 +158,137 @@ func expectResourceDeviceInstances(t *testing.T, resp ResourceDeviceInstances, e
t.Errorf("expected %q got %q", expected, got)
}
}
func TestDeviceRunContainerOptions(t *testing.T) {
const (
podUID = "pod"
containerName = "container"
resource1 = "example1.com/resource1"
resource2 = "example2.com/resource2"
)
testCases := []struct {
description string
gate bool
responsesPerResource map[string]*pluginapi.ContainerAllocateResponse
expected *DeviceRunContainerOptions
}{
{
description: "empty response",
gate: false,
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
resource1: newContainerAllocateResponse(),
},
expected: &DeviceRunContainerOptions{},
},
{
description: "cdi devices are ingored when feature gate is disabled",
gate: false,
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
resource1: newContainerAllocateResponse(
withDevices(map[string]string{"/dev/r1": "/dev/r1"}),
withMounts(map[string]string{"/home/lib1": "/home/lib1"}),
withEnvs(map[string]string{"ENV1": "VALUE1"}),
withCDIDevices("vendor1.com/class1=device1", "vendor2.com/class2=device2"),
),
},
expected: &DeviceRunContainerOptions{
Devices: []kubecontainer.DeviceInfo{
{PathOnHost: "/dev/r1", PathInContainer: "/dev/r1", Permissions: "mrw"},
},
Mounts: []kubecontainer.Mount{
{Name: "/home/lib1", HostPath: "/home/lib1", ContainerPath: "/home/lib1", ReadOnly: true},
},
Envs: []kubecontainer.EnvVar{
{Name: "ENV1", Value: "VALUE1"},
},
},
},
{
description: "cdi devices are handled when feature gate is enabled",
gate: true,
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
resource1: newContainerAllocateResponse(
withCDIDevices("vendor1.com/class1=device1", "vendor2.com/class2=device2"),
),
},
expected: &DeviceRunContainerOptions{
Annotations: []kubecontainer.Annotation{
{Name: "cdi.k8s.io/devicemanager_pod-container", Value: "vendor1.com/class1=device1,vendor2.com/class2=device2"},
},
CDIDevices: []kubecontainer.CDIDevice{
{Name: "vendor1.com/class1=device1"},
{Name: "vendor2.com/class2=device2"},
},
},
},
{
description: "cdi devices from multiple resources are handled when feature gate is enabled",
gate: true,
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
resource1: newContainerAllocateResponse(
withCDIDevices("vendor1.com/class1=device1", "vendor2.com/class2=device2"),
),
resource2: newContainerAllocateResponse(
withCDIDevices("vendor3.com/class3=device3", "vendor4.com/class4=device4"),
),
},
expected: &DeviceRunContainerOptions{
Annotations: []kubecontainer.Annotation{
{Name: "cdi.k8s.io/devicemanager_pod-container", Value: "vendor1.com/class1=device1,vendor2.com/class2=device2,vendor3.com/class3=device3,vendor4.com/class4=device4"},
},
CDIDevices: []kubecontainer.CDIDevice{
{Name: "vendor1.com/class1=device1"},
{Name: "vendor2.com/class2=device2"},
{Name: "vendor3.com/class3=device3"},
{Name: "vendor4.com/class4=device4"},
},
},
},
{
description: "duplicate cdi devices are skipped",
gate: true,
responsesPerResource: map[string]*pluginapi.ContainerAllocateResponse{
resource1: newContainerAllocateResponse(
withCDIDevices("vendor1.com/class1=device1", "vendor2.com/class2=device2"),
),
resource2: newContainerAllocateResponse(
withCDIDevices("vendor2.com/class2=device2", "vendor3.com/class3=device3"),
),
},
expected: &DeviceRunContainerOptions{
Annotations: []kubecontainer.Annotation{
{Name: "cdi.k8s.io/devicemanager_pod-container", Value: "vendor1.com/class1=device1,vendor2.com/class2=device2,vendor3.com/class3=device3"},
},
CDIDevices: []kubecontainer.CDIDevice{
{Name: "vendor1.com/class1=device1"},
{Name: "vendor2.com/class2=device2"},
{Name: "vendor3.com/class3=device3"},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
as := assert.New(t)
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DevicePluginCDIDevices, tc.gate)()
podDevices := newPodDevices()
for resourceName, response := range tc.responsesPerResource {
podDevices.insert("pod", "container", resourceName,
nil,
response,
)
}
opts := podDevices.deviceRunContainerOptions(podUID, containerName)
// The exact ordering of the options depends on the order of the resources in the map.
// We therefore use `ElementsMatch` instead of `Equal` on the member slices.
as.ElementsMatch(tc.expected.Annotations, opts.Annotations)
as.ElementsMatch(tc.expected.CDIDevices, opts.CDIDevices)
as.ElementsMatch(tc.expected.Devices, opts.Devices)
as.ElementsMatch(tc.expected.Envs, opts.Envs)
as.ElementsMatch(tc.expected.Mounts, opts.Mounts)
})
}
}

View File

@@ -91,6 +91,8 @@ type DeviceRunContainerOptions struct {
Devices []kubecontainer.DeviceInfo
// The Annotations for the container
Annotations []kubecontainer.Annotation
// CDI Devices for the container
CDIDevices []kubecontainer.CDIDevice
}
// TODO: evaluate whether we need this error definition.