Documentation
¶
Index ¶
- Constants
- Variables
- func UpdateNodeGpuLabel(ctx context.Context, client kubernetes.Interface, driver, cuda *string, ...) error
- type CheckGpuStatus
- type CheckWslGPU
- type ConfigureContainerdRuntime
- type ContainerdInstalled
- type CudaInstalled
- type CudaNotInstalled
- type CurrentNodeInK8s
- type DisableNouveauModule
- type GPUEnablePrepare
- type GpuDevicePluginInstalled
- type InstallContainerToolkitModule
- type InstallCudaDriver
- type InstallDriversModule
- type InstallNvidiaContainerToolkit
- type InstallPlugin
- type InstallPluginModule
- type NodeLabelingModule
- type NodeUnlabelingModule
- type NvidiaGraphicsCard
- type PatchK3sDriver
- type PrintGpuStatus
- type PrintPluginsStatus
- type RemoveContainerRuntimeConfig
- type RemoveNodeLabels
- type RestartContainerdModule
- type RestartK3sServiceModule
- type RestartPlugin
- type UninstallCudaModule
- type UninstallNvidiaDrivers
- type UpdateNodeGPUInfo
- type UpdateNvidiaContainerToolkitSource
- type WriteNouveauBlacklist
Constants ¶
View Source
const (
GpuLabelGroup = "gpu.bytetrade.io"
)
Variables ¶
View Source
var ( GpuDriverLabel = GpuLabelGroup + "/driver" GpuCudaLabel = GpuLabelGroup + "/cuda" GpuCudaSupportedLabel = GpuLabelGroup + "/cuda-supported" )
Functions ¶
func UpdateNodeGpuLabel ¶
func UpdateNodeGpuLabel(ctx context.Context, client kubernetes.Interface, driver, cuda *string, supported *string) error
update k8s node labels gpu.bytetrade.io/driver and gpu.bytetrade.io/cuda. if labels are not exists, create it.
Types ¶
type CheckGpuStatus ¶
type CheckGpuStatus struct {
common.KubeAction
}
type CheckWslGPU ¶
type CheckWslGPU struct {
}
func (*CheckWslGPU) CheckNvidiaSmiFileExists ¶
func (t *CheckWslGPU) CheckNvidiaSmiFileExists() bool
func (*CheckWslGPU) Execute ¶
func (t *CheckWslGPU) Execute(runtime *common.KubeRuntime)
type ConfigureContainerdRuntime ¶
type ConfigureContainerdRuntime struct {
common.KubeAction
}
type ContainerdInstalled ¶
type ContainerdInstalled struct {
common.KubePrepare
}
type CudaInstalled ¶
type CudaInstalled struct {
common.KubePrepare
}
type CudaNotInstalled ¶
type CudaNotInstalled struct {
common.KubePrepare
}
type CurrentNodeInK8s ¶
type CurrentNodeInK8s struct {
common.KubePrepare
}
type DisableNouveauModule ¶
type DisableNouveauModule struct {
common.KubeModule
}
func (*DisableNouveauModule) Init ¶
func (m *DisableNouveauModule) Init()
type GPUEnablePrepare ¶
type GPUEnablePrepare struct {
common.KubePrepare
}
type GpuDevicePluginInstalled ¶
type GpuDevicePluginInstalled struct {
common.KubePrepare
}
type InstallContainerToolkitModule ¶
type InstallContainerToolkitModule struct {
common.KubeModule
manifest.ManifestModule
Skip bool // enableGPU && ubuntuVersionSupport
SkipCudaCheck bool
}
func (*InstallContainerToolkitModule) Init ¶
func (m *InstallContainerToolkitModule) Init()
func (*InstallContainerToolkitModule) IsSkip ¶
func (m *InstallContainerToolkitModule) IsSkip() bool
type InstallCudaDriver ¶
type InstallCudaDriver struct {
common.KubeAction
manifest.ManifestAction
}
type InstallDriversModule ¶
type InstallDriversModule struct {
common.KubeModule
manifest.ManifestModule
Skip bool // enableGPU && ubuntuVersionSupport
// log a failure message and then exit
// instead of silently skip the jobs when:
// 1. no card is found (which skips the driver installation)
// 2. no driver is found (which skips the container toolkit installation)
FailOnNoInstallation bool
}
func (*InstallDriversModule) Init ¶
func (m *InstallDriversModule) Init()
func (*InstallDriversModule) IsSkip ¶
func (m *InstallDriversModule) IsSkip() bool
type InstallNvidiaContainerToolkit ¶
type InstallNvidiaContainerToolkit struct {
common.KubeAction
}
type InstallPlugin ¶
type InstallPlugin struct {
common.KubeAction
}
type InstallPluginModule ¶
type InstallPluginModule struct {
common.KubeModule
Skip bool // enableGPU && ubuntuVersionSupport
}
func (*InstallPluginModule) Init ¶
func (m *InstallPluginModule) Init()
func (*InstallPluginModule) IsSkip ¶
func (m *InstallPluginModule) IsSkip() bool
type NodeLabelingModule ¶
type NodeLabelingModule struct {
common.KubeModule
}
func (*NodeLabelingModule) Init ¶
func (l *NodeLabelingModule) Init()
type NodeUnlabelingModule ¶
type NodeUnlabelingModule struct {
common.KubeModule
}
func (*NodeUnlabelingModule) Init ¶
func (l *NodeUnlabelingModule) Init()
type NvidiaGraphicsCard ¶
type NvidiaGraphicsCard struct {
common.KubePrepare
ExitOnNotFound bool
}
type PatchK3sDriver ¶
type PatchK3sDriver struct {
common.KubeAction
}
type PrintGpuStatus ¶
type PrintGpuStatus struct {
common.KubeAction
}
type PrintPluginsStatus ¶
type PrintPluginsStatus struct {
common.KubeAction
}
type RemoveContainerRuntimeConfig ¶
type RemoveContainerRuntimeConfig struct {
common.KubeAction
}
type RemoveNodeLabels ¶
type RemoveNodeLabels struct {
common.KubeAction
}
type RestartContainerdModule ¶
type RestartContainerdModule struct {
common.KubeModule
Skip bool // enableGPU && ubuntuVersionSupport
}
func (*RestartContainerdModule) Init ¶
func (m *RestartContainerdModule) Init()
func (*RestartContainerdModule) IsSkip ¶
func (m *RestartContainerdModule) IsSkip() bool
type RestartK3sServiceModule ¶
type RestartK3sServiceModule struct {
common.KubeModule
Skip bool // enableGPU && ubuntuVersionSupport
}
func (*RestartK3sServiceModule) Init ¶
func (m *RestartK3sServiceModule) Init()
func (*RestartK3sServiceModule) IsSkip ¶
func (m *RestartK3sServiceModule) IsSkip() bool
type RestartPlugin ¶
type RestartPlugin struct {
common.KubeAction
}
type UninstallCudaModule ¶
type UninstallCudaModule struct {
common.KubeModule
}
func (*UninstallCudaModule) Init ¶
func (l *UninstallCudaModule) Init()
type UninstallNvidiaDrivers ¶
type UninstallNvidiaDrivers struct {
common.KubeAction
}
type UpdateNodeGPUInfo ¶
type UpdateNodeGPUInfo struct {
common.KubeAction
}
type UpdateNvidiaContainerToolkitSource ¶
type UpdateNvidiaContainerToolkitSource struct {
common.KubeAction
manifest.ManifestAction
}
type WriteNouveauBlacklist ¶
type WriteNouveauBlacklist struct {
common.KubeAction
}
Click to show internal directories.
Click to hide internal directories.