Skip to content

Commit

Permalink
add new field kernelModuleType to the API Spec
Browse files Browse the repository at this point in the history
Signed-off-by: Tariq Ibrahim <[email protected]>
  • Loading branch information
tariq1890 committed Jan 13, 2025
1 parent c4bbc2f commit 10f8018
Show file tree
Hide file tree
Showing 15 changed files with 128 additions and 29 deletions.
14 changes: 9 additions & 5 deletions api/nvidia/v1/clusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -482,12 +482,20 @@ type DriverSpec struct {
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
UsePrecompiled *bool `json:"usePrecompiled,omitempty"`

// Deprecated: This field is no longer honored by the gpu-operator. Please use KernelModuleType instead.
// UseOpenKernelModules indicates if the open GPU kernel modules should be used
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable use of open GPU kernel modules"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
UseOpenKernelModules *bool `json:"useOpenKernelModules,omitempty"`

// KernelModuleType represents the type of driver kernel modules to be used when installing the GPU driver.
// Accepted values are auto, proprietary and open. NOTE: If auto is chosen, it means that the recommended kernel module
// type is chosen based on the GPU devices on the host and the driver branch used
// +kubebuilder:validation:Enum=auto;open;proprietary
// +kubebuilder:default=auto
KernelModuleType string `json:"kernelModuleType,omitempty"`

// Enabled indicates if deployment of NVIDIA Driver through operator is enabled
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable NVIDIA Driver deployment through GPU Operator"
Expand Down Expand Up @@ -1856,11 +1864,7 @@ func (d *DriverSpec) UsePrecompiledDrivers() bool {

// OpenKernelModulesEnabled returns true if driver install is enabled using open GPU kernel modules
func (d *DriverSpec) OpenKernelModulesEnabled() bool {
if d.UseOpenKernelModules == nil {
// default is false if not specified by user
return false
}
return *d.UseOpenKernelModules
return d.KernelModuleType == "open"
}

// IsEnabled returns true if device-plugin is enabled(default) through gpu-operator
Expand Down
13 changes: 9 additions & 4 deletions api/nvidia/v1alpha1/nvidiadriver_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,20 @@ type NVIDIADriverSpec struct {
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="usePrecompiled is an immutable field. Please create a new NvidiaDriver resource instead when you want to change this setting."
UsePrecompiled *bool `json:"usePrecompiled,omitempty"`

// Deprecated: This field is no longer honored by the gpu-operator. Please use KernelModuleType instead.
// UseOpenKernelModules indicates if the open GPU kernel modules should be used
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable use of open GPU kernel modules"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
UseOpenKernelModules *bool `json:"useOpenKernelModules,omitempty"`

// KernelModuleType represents the type of driver kernel modules to be used when installing the GPU driver.
// Accepted values are auto, proprietary and open. NOTE: If auto is chosen, it means that the recommended kernel module
// type is chosen based on the GPU devices on the host and the driver branch used
// +kubebuilder:validation:Enum=auto;open;proprietary
// +kubebuilder:default=auto
KernelModuleType string `json:"kernelModuleType,omitempty"`

// NVIDIA Driver container startup probe settings
StartupProbe *ContainerProbeSpec `json:"startupProbe,omitempty"`

Expand Down Expand Up @@ -642,10 +650,7 @@ func (d *NVIDIADriverSpec) IsGDRCopyEnabled() bool {

// IsOpenKernelModulesEnabled returns true if NVIDIA OpenRM drivers are enabled
func (d *NVIDIADriverSpec) IsOpenKernelModulesEnabled() bool {
if d.UseOpenKernelModules == nil || !*d.UseOpenKernelModules {
return false
}
return true
return d.KernelModuleType == "open"
}

// IsOpenKernelModulesRequired returns true if NVIDIA OpenRM drivers required in this configuration
Expand Down
16 changes: 14 additions & 2 deletions bundle/manifests/nvidia.com_clusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,17 @@ spec:
name:
type: string
type: object
kernelModuleType:
default: auto
description: |-
KernelModuleType represents the type of driver kernel modules to be used when installing the GPU driver.
Accepted values are auto, proprietary and open. NOTE: If auto is chosen, it means that the recommended kernel module
type is chosen based on the GPU devices on the host and the driver branch used
enum:
- auto
- open
- proprietary
type: string
licensingConfig:
description: 'Optional: Licensing configuration for NVIDIA vGPU
licensing'
Expand Down Expand Up @@ -978,8 +989,9 @@ spec:
NVIDIA Driver is managed by the NVIDIADriver CRD type
type: boolean
useOpenKernelModules:
description: UseOpenKernelModules indicates if the open GPU kernel
modules should be used
description: |-
Deprecated: This field is no longer honored by the gpu-operator. Please use KernelModuleType instead.
UseOpenKernelModules indicates if the open GPU kernel modules should be used
type: boolean
usePrecompiled:
description: UsePrecompiled indicates if deployment of NVIDIA
Expand Down
16 changes: 14 additions & 2 deletions bundle/manifests/nvidia.com_nvidiadrivers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,17 @@ spec:
name:
type: string
type: object
kernelModuleType:
default: auto
description: |-
KernelModuleType represents the type of driver kernel modules to be used when installing the GPU driver.
Accepted values are auto, proprietary and open. NOTE: If auto is chosen, it means that the recommended kernel module
type is chosen based on the GPU devices on the host and the driver branch used
enum:
- auto
- open
- proprietary
type: string
labels:
additionalProperties:
type: string
Expand Down Expand Up @@ -684,8 +695,9 @@ spec:
type: object
type: array
useOpenKernelModules:
description: UseOpenKernelModules indicates if the open GPU kernel
modules should be used
description: |-
Deprecated: This field is no longer honored by the gpu-operator. Please use KernelModuleType instead.
UseOpenKernelModules indicates if the open GPU kernel modules should be used
type: boolean
usePrecompiled:
description: UsePrecompiled indicates if deployment of NVIDIA Driver
Expand Down
16 changes: 14 additions & 2 deletions config/crd/bases/nvidia.com_clusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,17 @@ spec:
name:
type: string
type: object
kernelModuleType:
default: auto
description: |-
KernelModuleType represents the type of driver kernel modules to be used when installing the GPU driver.
Accepted values are auto, proprietary and open. NOTE: If auto is chosen, it means that the recommended kernel module
type is chosen based on the GPU devices on the host and the driver branch used
enum:
- auto
- open
- proprietary
type: string
licensingConfig:
description: 'Optional: Licensing configuration for NVIDIA vGPU
licensing'
Expand Down Expand Up @@ -978,8 +989,9 @@ spec:
NVIDIA Driver is managed by the NVIDIADriver CRD type
type: boolean
useOpenKernelModules:
description: UseOpenKernelModules indicates if the open GPU kernel
modules should be used
description: |-
Deprecated: This field is no longer honored by the gpu-operator. Please use KernelModuleType instead.
UseOpenKernelModules indicates if the open GPU kernel modules should be used
type: boolean
usePrecompiled:
description: UsePrecompiled indicates if deployment of NVIDIA
Expand Down
16 changes: 14 additions & 2 deletions config/crd/bases/nvidia.com_nvidiadrivers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,17 @@ spec:
name:
type: string
type: object
kernelModuleType:
default: auto
description: |-
KernelModuleType represents the type of driver kernel modules to be used when installing the GPU driver.
Accepted values are auto, proprietary and open. NOTE: If auto is chosen, it means that the recommended kernel module
type is chosen based on the GPU devices on the host and the driver branch used
enum:
- auto
- open
- proprietary
type: string
labels:
additionalProperties:
type: string
Expand Down Expand Up @@ -684,8 +695,9 @@ spec:
type: object
type: array
useOpenKernelModules:
description: UseOpenKernelModules indicates if the open GPU kernel
modules should be used
description: |-
Deprecated: This field is no longer honored by the gpu-operator. Please use KernelModuleType instead.
UseOpenKernelModules indicates if the open GPU kernel modules should be used
type: boolean
usePrecompiled:
description: UsePrecompiled indicates if deployment of NVIDIA Driver
Expand Down
11 changes: 9 additions & 2 deletions controllers/object_controls.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ const (
DefaultCCModeEnvName = "DEFAULT_CC_MODE"
// OpenKernelModulesEnabledEnvName is the name of the driver-container envvar for enabling open GPU kernel module support
OpenKernelModulesEnabledEnvName = "OPEN_KERNEL_MODULES_ENABLED"
// KernelModuleTypeEnvName is the name of the driver-container envvar to set the desired kernel module type
KernelModuleTypeEnvName = "KERNEL_MODULE_TYPE"
// MPSRootEnvName is the name of the envvar for configuring the MPS root
MPSRootEnvName = "MPS_ROOT"
// DefaultMPSRoot is the default MPS root path on the host
Expand Down Expand Up @@ -3166,8 +3168,13 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
setContainerEnv(driverContainer, env.Name, env.Value)
}
}
if config.Driver.OpenKernelModulesEnabled() {
setContainerEnv(driverContainer, OpenKernelModulesEnabledEnvName, "true")

if len(config.Driver.KernelModuleType) > 0 {
setContainerEnv(driverContainer, KernelModuleTypeEnvName, config.Driver.KernelModuleType)
// we set the "OPEN_KERNEL_MODULES_ENABLED" envar for backwards compatibility with older driver containers
if config.Driver.OpenKernelModulesEnabled() {
setContainerEnv(driverContainer, OpenKernelModulesEnabledEnvName, "true")
}
}

// set container probe timeouts
Expand Down
16 changes: 14 additions & 2 deletions deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,17 @@ spec:
name:
type: string
type: object
kernelModuleType:
default: auto
description: |-
KernelModuleType represents the type of driver kernel modules to be used when installing the GPU driver.
Accepted values are auto, proprietary and open. NOTE: If auto is chosen, it means that the recommended kernel module
type is chosen based on the GPU devices on the host and the driver branch used
enum:
- auto
- open
- proprietary
type: string
licensingConfig:
description: 'Optional: Licensing configuration for NVIDIA vGPU
licensing'
Expand Down Expand Up @@ -978,8 +989,9 @@ spec:
NVIDIA Driver is managed by the NVIDIADriver CRD type
type: boolean
useOpenKernelModules:
description: UseOpenKernelModules indicates if the open GPU kernel
modules should be used
description: |-
Deprecated: This field is no longer honored by the gpu-operator. Please use KernelModuleType instead.
UseOpenKernelModules indicates if the open GPU kernel modules should be used
type: boolean
usePrecompiled:
description: UsePrecompiled indicates if deployment of NVIDIA
Expand Down
16 changes: 14 additions & 2 deletions deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,17 @@ spec:
name:
type: string
type: object
kernelModuleType:
default: auto
description: |-
KernelModuleType represents the type of driver kernel modules to be used when installing the GPU driver.
Accepted values are auto, proprietary and open. NOTE: If auto is chosen, it means that the recommended kernel module
type is chosen based on the GPU devices on the host and the driver branch used
enum:
- auto
- open
- proprietary
type: string
labels:
additionalProperties:
type: string
Expand Down Expand Up @@ -684,8 +695,9 @@ spec:
type: object
type: array
useOpenKernelModules:
description: UseOpenKernelModules indicates if the open GPU kernel
modules should be used
description: |-
Deprecated: This field is no longer honored by the gpu-operator. Please use KernelModuleType instead.
UseOpenKernelModules indicates if the open GPU kernel modules should be used
type: boolean
usePrecompiled:
description: UsePrecompiled indicates if deployment of NVIDIA Driver
Expand Down
2 changes: 1 addition & 1 deletion deployments/gpu-operator/templates/clusterpolicy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ spec:
driver:
enabled: {{ .Values.driver.enabled }}
useNvidiaDriverCRD: {{ .Values.driver.nvidiaDriverCRD.enabled }}
useOpenKernelModules: {{ .Values.driver.useOpenKernelModules }}
kernelModuleType: {{ .Values.driver.kernelModuleType }}
usePrecompiled: {{ .Values.driver.usePrecompiled }}
{{- if .Values.driver.repository }}
repository: {{ .Values.driver.repository }}
Expand Down
2 changes: 1 addition & 1 deletion deployments/gpu-operator/templates/nvidiadriver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ spec:
repository: {{ .Values.driver.repository }}
image: {{ .Values.driver.image }}
version: {{ .Values.driver.version }}
useOpenKernelModules: {{ .Values.driver.useOpenKernelModules }}
kernelModuleType: {{ .Values.driver.kernelModuleType }}
usePrecompiled: {{ .Values.driver.usePrecompiled }}
driverType: {{ .Values.driver.nvidiaDriverCRD.driverType | default "gpu" }}
{{- if .Values.daemonsets.annotations }}
Expand Down
6 changes: 5 additions & 1 deletion deployments/gpu-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,11 @@ driver:
deployDefaultCR: true
driverType: gpu
nodeSelector: {}
useOpenKernelModules: false
kernelModuleType: "auto"

# NOTE: useOpenKernelModules has been deprecated and made no-op. Please use kernelModuleType instead.
# useOpenKernelModules: false

# use pre-compiled packages for NVIDIA driver installation.
# only supported for as a tech-preview feature on ubuntu22.04 kernels.
usePrecompiled: false
Expand Down
4 changes: 2 additions & 2 deletions internal/state/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,8 @@ func TestDriverSpec(t *testing.T) {
Effect: "NoSchedule",
},
},
PriorityClassName: "custom-priority-class-name",
UseOpenKernelModules: utils.BoolPtr(true),
PriorityClassName: "custom-priority-class-name",
KernelModuleType: "open",
}

driverSpec.Labels = sanitizeDriverLabels(driverSpec.Labels)
Expand Down
2 changes: 2 additions & 0 deletions internal/state/testdata/golden/driver-full-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: KERNEL_MODULE_TYPE
value: open
- name: OPEN_KERNEL_MODULES_ENABLED
value: "true"
- name: FOO
Expand Down
7 changes: 6 additions & 1 deletion manifests/state-driver/0500_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,14 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.hostIP
{{- if deref .Driver.Spec.UseOpenKernelModules }}
{{- if .Driver.Spec.KernelModuleType }}
- name: KERNEL_MODULE_TYPE
value: {{ .Driver.Spec.KernelModuleType }}
# we set this env var for backwards compatibility with older driver versions
{{- if eq .Driver.Spec.KernelModuleType "open"}}
- name: OPEN_KERNEL_MODULES_ENABLED
value: "true"
{{- end }}
{{- end }}
{{- if and (.Openshift) (.Runtime.OpenshiftVersion) }}
- name: OPENSHIFT_VERSION
Expand Down

0 comments on commit 10f8018

Please sign in to comment.