diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 64340a8be..5454cd3cf 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -133,6 +133,8 @@ const ( GDSEnabledEnvName = "GDS_ENABLED" // MOFEDEnabledEnvName is the env name to enable MOFED devices injection with device-plugin MOFEDEnabledEnvName = "MOFED_ENABLED" + // GDRCopyEnabledEnvName is the envvar that enables injection of the GDRCopy device node with the device-plugin + GDRCopyEnabledEnvName = "GDRCOPY_ENABLED" // ServiceMonitorCRDName is the name of the CRD defining the ServiceMonitor kind ServiceMonitorCRDName = "servicemonitors.monitoring.coreos.com" // DefaultToolkitInstallDir is the default toolkit installation directory on the host @@ -1500,6 +1502,10 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe setContainerEnv(mainContainer, MOFEDEnabledEnvName, "true") } + if config.GDRCopy != nil && config.GDRCopy.IsEnabled() { + setContainerEnv(mainContainer, GDRCopyEnabledEnvName, "true") + } + // apply plugin configuration through ConfigMap if one is provided err = handleDevicePluginConfig(obj, config) if err != nil { diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 972e2d173..f9cc1ba9c 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -871,6 +871,44 @@ func TestTransformDevicePlugin(t *testing.T) { }, }).WithContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret").WithRuntimeClassName("nvidia"), }, + { + description: "transform device plugin, gds and gdrcopy enabled", + ds: NewDaemonset(). + WithContainer(corev1.Container{Name: "nvidia-device-plugin"}), + cpSpec: &gpuv1.ClusterPolicySpec{ + DevicePlugin: gpuv1.DevicePluginSpec{ + Repository: "nvcr.io/nvidia/cloud-native", + Image: "nvidia-device-plugin", + Version: "v1.0.0", + ImagePullPolicy: "IfNotPresent", + }, + Toolkit: gpuv1.ToolkitSpec{ + Enabled: newBoolPtr(true), + InstallDir: "/path/to/install", + }, + GDRCopy: &gpuv1.GDRCopySpec{ + Enabled: newBoolPtr(true), + }, + GPUDirectStorage: &gpuv1.GPUDirectStorageSpec{ + Enabled: newBoolPtr(true), + }, + }, + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "nvidia-device-plugin", + Image: "nvcr.io/nvidia/cloud-native/nvidia-device-plugin:v1.0.0", + ImagePullPolicy: corev1.PullIfNotPresent, + Env: []corev1.EnvVar{ + {Name: GDSEnabledEnvName, Value: "true"}, + {Name: MOFEDEnabledEnvName, Value: "true"}, + {Name: GDRCopyEnabledEnvName, Value: "true"}, + {Name: "NVIDIA_MIG_MONITOR_DEVICES", Value: "all"}, + {Name: CDIEnabledEnvName, Value: "true"}, + {Name: DeviceListStrategyEnvName, Value: "cdi-annotations,cdi-cri"}, + {Name: CDIAnnotationPrefixEnvName, Value: "cdi.k8s.io/"}, + {Name: NvidiaCDIHookPathEnvName, Value: "/path/to/install/toolkit/nvidia-cdi-hook"}, + }, + }).WithRuntimeClassName("nvidia"), + }, } for _, tc := range testCases {