Merge pull request #56117 from jiayingz/deviceplugin-addon-config
Automatic merge from submit-queue (batch tested with PRs 56021, 55843, 55088, 56117, 55859). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Changes nvidia-gpu device plugin addon config settings: - Runs as system critical pod - Makes resource limits to match its resource requets - Modifies test/e2e/scheduling/nvidia-gpus.go to cope with the recent change of running the device plugin as a system addon. - The resource settings of the addon is based on the test results from 8 nvidia-tesla-k80 gpus. **What this PR does / why we need it**: **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes # **Special notes for your reviewer**: **Release note**: ```release-note ```
This commit is contained in:
@@ -11,7 +11,10 @@ spec:
|
|||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
k8s-app: nvidia-gpu-device-plugin
|
k8s-app: nvidia-gpu-device-plugin
|
||||||
|
annotations:
|
||||||
|
scheduler.alpha.kubernetes.io/critical-pod: ''
|
||||||
spec:
|
spec:
|
||||||
|
priorityClassName: system-node-critical
|
||||||
affinity:
|
affinity:
|
||||||
nodeAffinity:
|
nodeAffinity:
|
||||||
requiredDuringSchedulingIgnoredDuringExecution:
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
@@ -34,7 +37,10 @@ spec:
|
|||||||
name: nvidia-gpu-device-plugin
|
name: nvidia-gpu-device-plugin
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 10m
|
cpu: 50m
|
||||||
|
memory: 10Mi
|
||||||
|
limits:
|
||||||
|
cpu: 50m
|
||||||
memory: 10Mi
|
memory: 10Mi
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
|
@@ -183,6 +183,11 @@ func testNvidiaGPUsOnCOS(f *framework.Framework) {
|
|||||||
|
|
||||||
pods, err := framework.WaitForControlledPods(f.ClientSet, ds.Namespace, ds.Name, extensionsinternal.Kind("DaemonSet"))
|
pods, err := framework.WaitForControlledPods(f.ClientSet, ds.Namespace, ds.Name, extensionsinternal.Kind("DaemonSet"))
|
||||||
framework.ExpectNoError(err, "getting pods controlled by the daemonset")
|
framework.ExpectNoError(err, "getting pods controlled by the daemonset")
|
||||||
|
devicepluginPods, err := framework.WaitForControlledPods(f.ClientSet, "kube-system", "nvidia-gpu-device-plugin", extensionsinternal.Kind("DaemonSet"))
|
||||||
|
if err == nil {
|
||||||
|
framework.Logf("Adding deviceplugin addon pod.")
|
||||||
|
pods.Items = append(pods.Items, devicepluginPods.Items...)
|
||||||
|
}
|
||||||
framework.Logf("Starting ResourceUsageGather for the created DaemonSet pods.")
|
framework.Logf("Starting ResourceUsageGather for the created DaemonSet pods.")
|
||||||
rsgather, err := framework.NewResourceUsageGatherer(f.ClientSet, framework.ResourceGathererOptions{false, false, 2 * time.Second, 2 * time.Second, true}, pods)
|
rsgather, err := framework.NewResourceUsageGatherer(f.ClientSet, framework.ResourceGathererOptions{false, false, 2 * time.Second, 2 * time.Second, true}, pods)
|
||||||
framework.ExpectNoError(err, "creating ResourceUsageGather for the daemonset pods")
|
framework.ExpectNoError(err, "creating ResourceUsageGather for the daemonset pods")
|
||||||
|
Reference in New Issue
Block a user