Documentation
¶
Index ¶
- Constants
- Variables
- func ClusterRole(n ClusterPolicyController) (gpuv1.State, error)
- func ClusterRoleBinding(n ClusterPolicyController) (gpuv1.State, error)
- func ConfigMaps(n ClusterPolicyController) (gpuv1.State, error)
- func DaemonSet(n ClusterPolicyController) (gpuv1.State, error)
- func Deployment(n ClusterPolicyController) (gpuv1.State, error)
- func GetClusterWideProxy(ctx context.Context) (*apiconfigv1.Proxy, error)
- func KubernetesVersion() (string, error)
- func OpenshiftVersion(ctx context.Context) (string, error)
- func PrometheusRule(n ClusterPolicyController) (gpuv1.State, error)
- func Role(n ClusterPolicyController) (gpuv1.State, error)
- func RoleBinding(n ClusterPolicyController) (gpuv1.State, error)
- func RuntimeClasses(n ClusterPolicyController) (gpuv1.State, error)
- func SecurityContextConstraints(n ClusterPolicyController) (gpuv1.State, error)
- func Service(n ClusterPolicyController) (gpuv1.State, error)
- func ServiceAccount(n ClusterPolicyController) (gpuv1.State, error)
- func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error)
- func TransformCCManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformDCGM(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformDCGMExporterService(obj *corev1.Service, config *gpuv1.ClusterPolicySpec) error
- func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformKataManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformMPSControlDaemon(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformNodeStatusExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformSandboxDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformVFIOManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformVGPUDeviceManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformVGPUManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, ...) error
- func TransformValidatorComponent(config *gpuv1.ClusterPolicySpec, podSpec *corev1.PodSpec, component string) error
- func TransformValidatorShared(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec) error
- type ClusterPolicyController
- type ClusterPolicyReconciler
- type ContainerProbe
- type MountPathToVolumeSource
- type NVIDIADriverReconciler
- type OpenShiftDriverToolkit
- type OperatorMetrics
- type Resources
- type UpgradeReconciler
Constants ¶
const ( // DefaultContainerdConfigFile indicates default config file path for containerd DefaultContainerdConfigFile = "/etc/containerd/config.toml" // DefaultContainerdDropInConfigFile indicates default drop-in config file path for containerd DefaultContainerdDropInConfigFile = "/etc/containerd/conf.d/99-nvidia.toml" // DefaultContainerdSocketFile indicates default containerd socket file DefaultContainerdSocketFile = "/run/containerd/containerd.sock" // DefaultDockerConfigFile indicates default config file path for docker DefaultDockerConfigFile = "/etc/docker/daemon.json" // DefaultDockerSocketFile indicates default docker socket file DefaultDockerSocketFile = "/var/run/docker.sock" // DefaultRuntimeNRISocketFile indicates the default container runtime NRI socket file DefaultRuntimeNRISocketFile = "/var/run/nri/nri.sock" // DefaultCRIOConfigFile indicates default config file path for cri-o. . DefaultCRIOConfigFile = "/etc/crio/config.toml" // DefaultCRIODropInConfigFile indicates the default path to the drop-in config file for cri-o DefaultCRIODropInConfigFile = "/etc/crio/crio.conf.d/99-nvidia.conf" // TrustedCAConfigMapName indicates configmap with custom user CA injected TrustedCAConfigMapName = "gpu-operator-trusted-ca" // TrustedCABundleFileName indicates custom user ca certificate filename TrustedCABundleFileName = "ca-bundle.crt" // TrustedCABundleMountDir indicates target mount directory of user ca bundle TrustedCABundleMountDir = "/etc/pki/ca-trust/extracted/pem" // TrustedCACertificate indicates injected CA certificate name TrustedCACertificate = "tls-ca-bundle.pem" // DefaultRuntimeClass represents "nvidia" RuntimeClass DefaultRuntimeClass = "nvidia" // DriverInstallPathVolName represents volume name for driver install path provided to toolkit DriverInstallPathVolName = "driver-install-path" // DefaultRuntimeNRISocketTargetDir represents target directory where runtime NRI socket directory will be mounted DefaultRuntimeNRISocketTargetDir = "/runtime/nri-sock-dir/" // DefaultRuntimeSocketTargetDir represents target directory where runtime socket directory will be mounted DefaultRuntimeSocketTargetDir = "/runtime/sock-dir/" // DefaultRuntimeConfigTargetDir represents target directory where runtime socket directory will be mounted DefaultRuntimeConfigTargetDir = "/runtime/config-dir/" // DefaultRuntimeDropInConfigTargetDir represents target directory where drop-in config directory will be mounted DefaultRuntimeDropInConfigTargetDir = "/runtime/config-dir.d/" // ValidatorImageEnvName indicates env name for validator image passed ValidatorImageEnvName = "VALIDATOR_IMAGE" // ValidatorImagePullPolicyEnvName indicates env name for validator image pull policy passed ValidatorImagePullPolicyEnvName = "VALIDATOR_IMAGE_PULL_POLICY" // ValidatorImagePullSecretsEnvName indicates env name for validator image pull secrets passed ValidatorImagePullSecretsEnvName = "VALIDATOR_IMAGE_PULL_SECRETS" // ValidatorRuntimeClassEnvName indicates env name of runtime class to be applied to validator pods ValidatorRuntimeClassEnvName = "VALIDATOR_RUNTIME_CLASS" // MigStrategyEnvName indicates env name for passing MIG strategy MigStrategyEnvName = "MIG_STRATEGY" // MigPartedDefaultConfigMapName indicates name of ConfigMap containing default mig-parted config MigPartedDefaultConfigMapName = "default-mig-parted-config" // MigDefaultGPUClientsConfigMapName indicates name of ConfigMap containing default gpu-clients MigDefaultGPUClientsConfigMapName = "default-gpu-clients" // DCGMRemoteEngineEnvName indicates env name to specify remote DCGM host engine ip:port DCGMRemoteEngineEnvName = "DCGM_REMOTE_HOSTENGINE_INFO" // DCGMDefaultPort indicates default port bound to DCGM host engine DCGMDefaultPort = 5555 // GPUDirectRDMAEnabledEnvName indicates if GPU direct RDMA is enabled through GPU operator GPUDirectRDMAEnabledEnvName = "GPU_DIRECT_RDMA_ENABLED" // UseHostMOFEDEnvName indicates if MOFED driver is pre-installed on the host UseHostMOFEDEnvName = "USE_HOST_MOFED" // MetricsConfigMountPath indicates mount path for custom dcgm metrics file MetricsConfigMountPath = "/etc/dcgm-exporter/" + MetricsConfigFileName // MetricsConfigFileName indicates custom dcgm metrics file name MetricsConfigFileName = "dcgm-metrics.csv" // NvidiaAnnotationHashKey indicates annotation name for last applied hash by gpu-operator NvidiaAnnotationHashKey = "nvidia.com/last-applied-hash" // NvidiaDisableRequireEnvName is the env name to disable default cuda constraints NvidiaDisableRequireEnvName = "NVIDIA_DISABLE_REQUIRE" // GDSEnabledEnvName is the env name to enable GDS support with device-plugin GDSEnabledEnvName = "GDS_ENABLED" // MOFEDEnabledEnvName is the env name to enable MOFED devices injection with device-plugin MOFEDEnabledEnvName = "MOFED_ENABLED" // GDRCopyEnabledEnvName is the envvar that enables injection of the GDRCopy device node with the device-plugin GDRCopyEnabledEnvName = "GDRCOPY_ENABLED" // ServiceMonitorCRDName is the name of the CRD defining the ServiceMonitor kind ServiceMonitorCRDName = "servicemonitors.monitoring.coreos.com" // DefaultToolkitInstallDir is the default toolkit installation directory on the host DefaultToolkitInstallDir = "/usr/local/nvidia" // ToolkitInstallDirEnvName is the name of the toolkit container env for configuring where NVIDIA Container Toolkit is installed ToolkitInstallDirEnvName = "ROOT" // VgpuDMDefaultConfigMapName indicates name of ConfigMap containing default vGPU devices configuration VgpuDMDefaultConfigMapName = "default-vgpu-devices-config" // VgpuDMDefaultConfigName indicates name of default configuration in the vGPU devices config file VgpuDMDefaultConfigName = "default" // NvidiaCtrRuntimeModeEnvName is the name of the toolkit container env for configuring the NVIDIA Container Runtime mode NvidiaCtrRuntimeModeEnvName = "NVIDIA_CONTAINER_RUNTIME_MODE" // NvidiaCtrRuntimeCDIPrefixesEnvName is the name of toolkit container env for configuring the CDI annotation prefixes NvidiaCtrRuntimeCDIPrefixesEnvName = "NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES" // CDIEnabledEnvName is the name of the envvar used to enable CDI in the operands CDIEnabledEnvName = "CDI_ENABLED" // NvidiaCDIHookPathEnvName is the name of the envvar specifying the path to the 'nvidia-cdi-hook' binary NvidiaCDIHookPathEnvName = "NVIDIA_CDI_HOOK_PATH" // CRIOConfigModeEnvName is the name of the envvar controlling how the toolkit container updates the cri-o configuration CRIOConfigModeEnvName = "CRIO_CONFIG_MODE" // CDIEnableNRIPlugin is the name of the env var for enabling NRI Plugin in the toolkit CDIEnableNRIPlugin = "ENABLE_NRI_PLUGIN" // DeviceListStrategyEnvName is the name of the envvar for configuring the device-list-strategy in the device-plugin DeviceListStrategyEnvName = "DEVICE_LIST_STRATEGY" // CDIAnnotationPrefixEnvName is the name of the device-plugin envvar for configuring the CDI annotation prefix CDIAnnotationPrefixEnvName = "CDI_ANNOTATION_PREFIX" // KataManagerAnnotationHashKey is the annotation indicating the hash of the kata-manager configuration KataManagerAnnotationHashKey = "nvidia.com/kata-manager.last-applied-hash" // DefaultKataArtifactsDir is the default directory to store kata artifacts on the host DefaultKataArtifactsDir = "/opt/nvidia-gpu-operator/artifacts/runtimeclasses/" // PodControllerRevisionHashLabelKey is the annotation key for pod controller revision hash value PodControllerRevisionHashLabelKey = "controller-revision-hash" // DefaultCCModeEnvName is the name of the envvar for configuring default CC mode on all compatible GPUs on the node DefaultCCModeEnvName = "DEFAULT_CC_MODE" // OpenKernelModulesEnabledEnvName is the name of the driver-container envvar for enabling open GPU kernel module support OpenKernelModulesEnabledEnvName = "OPEN_KERNEL_MODULES_ENABLED" // KernelModuleTypeEnvName is the name of the driver-container envvar to set the desired kernel module type KernelModuleTypeEnvName = "KERNEL_MODULE_TYPE" // MPSRootEnvName is the name of the envvar for configuring the MPS root MPSRootEnvName = "MPS_ROOT" // DefaultMPSRoot is the default MPS root path on the host DefaultMPSRoot = "/run/nvidia/mps" // HostRootEnvName is the name of the envvar representing the root path of the underlying host HostRootEnvName = "HOST_ROOT" // DefaultDriverInstallDir represents the default path of a driver container installation DefaultDriverInstallDir = "/run/nvidia/driver" // DriverInstallDirEnvName is the name of the envvar used by the driver-validator to represent the driver install dir DriverInstallDirEnvName = "DRIVER_INSTALL_DIR" // DriverInstallDirCtrPathEnvName is the name of the envvar used by the driver-validator to represent the path // of the driver install dir mounted in the container DriverInstallDirCtrPathEnvName = "DRIVER_INSTALL_DIR_CTR_PATH" // NvidiaRuntimeSetAsDefaultEnvName is the name of the toolkit container env for configuring NVIDIA Container Runtime as the default runtime NvidiaRuntimeSetAsDefaultEnvName = "NVIDIA_RUNTIME_SET_AS_DEFAULT" // NRIAnnotationDomain represents the domain name used for NRI annotations used for CDI device injections NRIAnnotationDomain = "nvidia.cdi.k8s.io" )
const ( // DriverLabelKey indicates pod label key of the driver DriverLabelKey = "app" // DriverLabelValue indicates pod label value of the driver DriverLabelValue = "nvidia-driver-daemonset" // UpgradeSkipDrainLabelSelector indicates the pod selector label to skip with drain UpgradeSkipDrainLabelSelector = "nvidia.com/gpu-driver-upgrade-drain.skip!=true" // AppComponentLabelKey indicates the label key of the component AppComponentLabelKey = "app.kubernetes.io/component" // AppComponentLabelValue indicates the label values of the nvidia-gpu-driver component AppComponentLabelValue = "nvidia-driver" )
Variables ¶
var CertConfigPathMap = map[string]string{
"centos": "/etc/pki/ca-trust/extracted/pem",
"debian": "/usr/local/share/ca-certificates",
"ubuntu": "/usr/local/share/ca-certificates",
"rhcos": "/etc/pki/ca-trust/extracted/pem",
"rhel": "/etc/pki/ca-trust/extracted/pem",
"rocky": "/etc/pki/ca-trust/extracted/pem",
"sles": "/etc/pki/trust/anchors",
"sl-micro": "/etc/pki/trust/anchors",
}
CertConfigPathMap indicates standard OS specific paths for ssl keys/certificates. Where Go looks for certs: https://golang.org/src/crypto/x509/root_linux.go Where OCP mounts proxy certs on RHCOS nodes: https://access.redhat.com/documentation/en-us/openshift_container_platform/4.3/html/authentication/ocp-certificates#proxy-certificates_ocp-certificates
var RepoConfigPathMap = map[string]string{
"centos": "/etc/yum.repos.d",
"debian": "/etc/apt/sources.list.d",
"ubuntu": "/etc/apt/sources.list.d",
"rhcos": "/etc/yum.repos.d",
"rhel": "/etc/yum.repos.d",
"rocky": "/etc/yum.repos.d",
"sles": "/etc/zypp/repos.d",
"sl-micro": "/etc/zypp/repos.d",
}
RepoConfigPathMap indicates standard OS specific paths for repository configuration files
var SubscriptionPathMap = map[string](MountPathToVolumeSource){ "rhel": { "/run/secrets/etc-pki-entitlement": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/pki/entitlement", Type: ptr.To(corev1.HostPathDirectory), }, }, "/run/secrets/redhat.repo": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/yum.repos.d/redhat.repo", Type: ptr.To(corev1.HostPathFile), }, }, "/run/secrets/rhsm": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/rhsm", Type: ptr.To(corev1.HostPathDirectory), }, }, }, "rhcos": { "/run/secrets/etc-pki-entitlement": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/pki/entitlement", Type: ptr.To(corev1.HostPathDirectory), }, }, "/run/secrets/redhat.repo": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/yum.repos.d/redhat.repo", Type: ptr.To(corev1.HostPathFile), }, }, "/run/secrets/rhsm": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/rhsm", Type: ptr.To(corev1.HostPathDirectory), }, }, }, "sles": { "/etc/zypp/credentials.d": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/zypp/credentials.d", Type: ptr.To(corev1.HostPathDirectory), }, }, "/etc/SUSEConnect": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/SUSEConnect", Type: ptr.To(corev1.HostPathFileOrCreate), }, }, }, "sl-micro": { "/etc/zypp/credentials.d": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/zypp/credentials.d", Type: ptr.To(corev1.HostPathDirectory), }, }, "/etc/SUSEConnect": corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: "/etc/SUSEConnect", Type: ptr.To(corev1.HostPathFileOrCreate), }, }, }, }
SubscriptionPathMap contains information on OS-specific paths that provide entitlements/subscription details on the host. These are used to enable Driver Container's access to packages controlled by the distro through their subscription and support program.
Functions ¶
func ClusterRole ¶
func ClusterRole(n ClusterPolicyController) (gpuv1.State, error)
ClusterRole creates ClusterRole resource
func ClusterRoleBinding ¶
func ClusterRoleBinding(n ClusterPolicyController) (gpuv1.State, error)
ClusterRoleBinding creates ClusterRoleBinding resource
func ConfigMaps ¶ added in v1.9.0
func ConfigMaps(n ClusterPolicyController) (gpuv1.State, error)
ConfigMaps creates ConfigMap resource(s)
func DaemonSet ¶
func DaemonSet(n ClusterPolicyController) (gpuv1.State, error)
DaemonSet creates Daemonset resource
func Deployment ¶
func Deployment(n ClusterPolicyController) (gpuv1.State, error)
Deployment creates Deployment resource
func GetClusterWideProxy ¶
func GetClusterWideProxy(ctx context.Context) (*apiconfigv1.Proxy, error)
GetClusterWideProxy returns cluster wide proxy object setup in OCP
func KubernetesVersion ¶
KubernetesVersion fetches the Kubernetes API server version
func OpenshiftVersion ¶
OpenshiftVersion fetches OCP version
func PrometheusRule ¶ added in v1.8.0
func PrometheusRule(n ClusterPolicyController) (gpuv1.State, error)
PrometheusRule creates PrometheusRule object
func RoleBinding ¶
func RoleBinding(n ClusterPolicyController) (gpuv1.State, error)
RoleBinding creates RoleBinding resource
func RuntimeClasses ¶
func RuntimeClasses(n ClusterPolicyController) (gpuv1.State, error)
func SecurityContextConstraints ¶
func SecurityContextConstraints(n ClusterPolicyController) (gpuv1.State, error)
SecurityContextConstraints creates SCC resources
func Service ¶
func Service(n ClusterPolicyController) (gpuv1.State, error)
Service creates Service object
func ServiceAccount ¶
func ServiceAccount(n ClusterPolicyController) (gpuv1.State, error)
ServiceAccount creates ServiceAccount resource
func ServiceMonitor ¶
func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error)
ServiceMonitor creates ServiceMonitor object
func TransformCCManager ¶
func TransformCCManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformCCManager transforms CC Manager daemonset with required config as per ClusterPolicy
func TransformDCGM ¶ added in v1.8.0
func TransformDCGM(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformDCGM transforms dcgm daemonset with required config as per ClusterPolicy
func TransformDCGMExporter ¶
func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformDCGMExporter transforms dcgm exporter daemonset with required config as per ClusterPolicy
func TransformDCGMExporterService ¶
func TransformDCGMExporterService(obj *corev1.Service, config *gpuv1.ClusterPolicySpec) error
func TransformDevicePlugin ¶
func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformDevicePlugin transforms k8s-device-plugin daemonset with required config as per ClusterPolicy
func TransformDriver ¶
func TransformDriver(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformDriver transforms Nvidia driver daemonset with required config as per ClusterPolicy
func TransformGPUDiscoveryPlugin ¶
func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformGPUDiscoveryPlugin transforms GPU discovery daemonset with required config as per ClusterPolicy
func TransformKataManager ¶
func TransformKataManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformKataManager transforms Kata Manager daemonset with required config as per ClusterPolicy
func TransformMIGManager ¶
func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformMIGManager transforms MIG Manager daemonset with required config as per ClusterPolicy
func TransformMPSControlDaemon ¶
func TransformMPSControlDaemon(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
func TransformNodeStatusExporter ¶ added in v1.8.0
func TransformNodeStatusExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformNodeStatusExporter transforms the node-status-exporter daemonset with required config as per ClusterPolicy
func TransformSandboxDevicePlugin ¶ added in v1.11.0
func TransformSandboxDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformSandboxDevicePlugin transforms sandbox-device-plugin daemonset with required config as per ClusterPolicy
func TransformSandboxValidator ¶ added in v1.11.0
func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformSandboxValidator transforms nvidia-sandbox-validator daemonset with required config as per ClusterPolicy
func TransformToolkit ¶
func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformToolkit transforms Nvidia container-toolkit daemonset with required config as per ClusterPolicy
func TransformVFIOManager ¶ added in v1.11.0
func TransformVFIOManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformVFIOManager transforms VFIO-PCI Manager daemonset with required config as per ClusterPolicy
func TransformVGPUDeviceManager ¶ added in v1.11.0
func TransformVGPUDeviceManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformVGPUDeviceManager transforms VGPU Device Manager daemonset with required config as per ClusterPolicy
func TransformVGPUManager ¶ added in v1.11.0
func TransformVGPUManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformVGPUManager transforms NVIDIA vGPU Manager daemonset with required config as per ClusterPolicy
func TransformValidator ¶
func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error
TransformValidator transforms nvidia-operator-validator daemonset with required config as per ClusterPolicy
func TransformValidatorComponent ¶
func TransformValidatorComponent(config *gpuv1.ClusterPolicySpec, podSpec *corev1.PodSpec, component string) error
TransformValidatorComponent applies changes to given validator component
func TransformValidatorShared ¶ added in v1.11.0
func TransformValidatorShared(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec) error
TransformValidatorShared applies general transformations to the validator daemonset with required config as per ClusterPolicy
Types ¶
type ClusterPolicyController ¶
type ClusterPolicyController struct {
// contains filtered or unexported fields
}
ClusterPolicyController represents clusterpolicy controller spec for GPU operator
type ClusterPolicyReconciler ¶
type ClusterPolicyReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
Namespace string
// contains filtered or unexported fields
}
ClusterPolicyReconciler reconciles a ClusterPolicy object
func (*ClusterPolicyReconciler) Reconcile ¶
func (r *ClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state. TODO(user): Modify the Reconcile function to compare the state specified by the ClusterPolicy object against the actual cluster state, and then perform operations to make the cluster state reflect the state specified by the user.
For more details, check Reconcile and its Result here: - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.0/pkg/reconcile
func (*ClusterPolicyReconciler) SetupWithManager ¶
SetupWithManager sets up the controller with the Manager.
type ContainerProbe ¶
type ContainerProbe string
ContainerProbe defines container probe types
const ( // Startup probe Startup ContainerProbe = "startup" // Liveness probe Liveness ContainerProbe = "liveness" // Readiness probe Readiness ContainerProbe = "readiness" )
type MountPathToVolumeSource ¶
type MountPathToVolumeSource map[string]corev1.VolumeSource
MountPathToVolumeSource maps a container mount path to a VolumeSource
type NVIDIADriverReconciler ¶
type NVIDIADriverReconciler struct {
client.Client
Scheme *runtime.Scheme
ClusterInfo clusterinfo.Interface
Namespace string
// contains filtered or unexported fields
}
NVIDIADriverReconciler reconciles a NVIDIADriver object
func (*NVIDIADriverReconciler) Reconcile ¶
func (r *NVIDIADriverReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state. TODO(user): Modify the Reconcile function to compare the state specified by the NVIDIADriver object against the actual cluster state, and then perform operations to make the cluster state reflect the state specified by the user.
For more details, check Reconcile and its Result here: - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.8.3/pkg/reconcile
func (*NVIDIADriverReconciler) SetupWithManager ¶
SetupWithManager sets up the controller with the Manager.
type OpenShiftDriverToolkit ¶ added in v1.9.0
type OpenShiftDriverToolkit struct {
// contains filtered or unexported fields
}
OpenShiftDriverToolkit contains the values required to deploy OpenShift DriverToolkit DaemonSet.
type OperatorMetrics ¶ added in v1.8.0
type OperatorMetrics struct {
// contains filtered or unexported fields
}
OperatorMetrics defines the Prometheus metrics exposed for the operator status
type Resources ¶
type Resources struct {
ServiceAccount corev1.ServiceAccount
Role rbacv1.Role
RoleBinding rbacv1.RoleBinding
ClusterRole rbacv1.ClusterRole
ClusterRoleBinding rbacv1.ClusterRoleBinding
ConfigMaps []corev1.ConfigMap
DaemonSet appsv1.DaemonSet
Deployment appsv1.Deployment
Pod corev1.Pod
Service corev1.Service
ServiceMonitor promv1.ServiceMonitor
PriorityClass schedv1.PriorityClass
Taint corev1.Taint
SecurityContextConstraints secv1.SecurityContextConstraints
RuntimeClasses []nodev1.RuntimeClass
PrometheusRule promv1.PrometheusRule
}
Resources indicates resources managed by GPU operator
type UpgradeReconciler ¶
type UpgradeReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
StateManager upgrade.ClusterUpgradeStateManager
}
UpgradeReconciler reconciles Driver Daemon Sets for upgrade
func (*UpgradeReconciler) Reconcile ¶
Reconcile is part of the main kubernetes reconciliation loop which aims to move the current state of the cluster closer to the desired state.
func (*UpgradeReconciler) SetupWithManager ¶
SetupWithManager sets up the controller with the Manager.