gpu

package
v0.0.0-...-2fd9d23 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 9, 2026 License: AGPL-3.0 Imports: 36 Imported by: 0

Documentation

Index

Constants

View Source
const (
	GpuLabelGroup = "gpu.bytetrade.io"
)

Variables

View Source
var (
	GpuDriverLabel        = GpuLabelGroup + "/driver"
	GpuCudaLabel          = GpuLabelGroup + "/cuda"
	GpuCudaSupportedLabel = GpuLabelGroup + "/cuda-supported"
)

Functions

func UpdateNodeGpuLabel

func UpdateNodeGpuLabel(ctx context.Context, client kubernetes.Interface, driver, cuda *string, supported *string) error

update k8s node labels gpu.bytetrade.io/driver and gpu.bytetrade.io/cuda. if labels are not exists, create it.

Types

type CheckGpuStatus

type CheckGpuStatus struct {
	common.KubeAction
}

func (*CheckGpuStatus) Execute

func (t *CheckGpuStatus) Execute(runtime connector.Runtime) error

type CheckWslGPU

type CheckWslGPU struct {
}

func (*CheckWslGPU) CheckNvidiaSmiFileExists

func (t *CheckWslGPU) CheckNvidiaSmiFileExists() bool

func (*CheckWslGPU) Execute

func (t *CheckWslGPU) Execute(runtime *common.KubeRuntime)

type ConfigureContainerdRuntime

type ConfigureContainerdRuntime struct {
	common.KubeAction
}

func (*ConfigureContainerdRuntime) Execute

func (t *ConfigureContainerdRuntime) Execute(runtime connector.Runtime) error

type ContainerdInstalled

type ContainerdInstalled struct {
	common.KubePrepare
}

func (*ContainerdInstalled) PreCheck

func (p *ContainerdInstalled) PreCheck(runtime connector.Runtime) (bool, error)

type CudaInstalled

type CudaInstalled struct {
	common.KubePrepare
}

func (*CudaInstalled) PreCheck

func (p *CudaInstalled) PreCheck(runtime connector.Runtime) (bool, error)

type CudaNotInstalled

type CudaNotInstalled struct {
	common.KubePrepare
}

func (*CudaNotInstalled) PreCheck

func (p *CudaNotInstalled) PreCheck(runtime connector.Runtime) (bool, error)

type CurrentNodeInK8s

type CurrentNodeInK8s struct {
	common.KubePrepare
}

func (*CurrentNodeInK8s) PreCheck

func (p *CurrentNodeInK8s) PreCheck(runtime connector.Runtime) (bool, error)

type DisableNouveauModule

type DisableNouveauModule struct {
	common.KubeModule
}

func (*DisableNouveauModule) Init

func (m *DisableNouveauModule) Init()

type GPUEnablePrepare

type GPUEnablePrepare struct {
	common.KubePrepare
}

func (*GPUEnablePrepare) PreCheck

func (p *GPUEnablePrepare) PreCheck(runtime connector.Runtime) (bool, error)

type GpuDevicePluginInstalled

type GpuDevicePluginInstalled struct {
	common.KubePrepare
}

func (*GpuDevicePluginInstalled) PreCheck

func (p *GpuDevicePluginInstalled) PreCheck(runtime connector.Runtime) (bool, error)

type InstallContainerToolkitModule

type InstallContainerToolkitModule struct {
	common.KubeModule
	manifest.ManifestModule
	Skip          bool // enableGPU && ubuntuVersionSupport
	SkipCudaCheck bool
}

func (*InstallContainerToolkitModule) Init

func (*InstallContainerToolkitModule) IsSkip

func (m *InstallContainerToolkitModule) IsSkip() bool

type InstallCudaDriver

type InstallCudaDriver struct {
	common.KubeAction
	manifest.ManifestAction
}

func (*InstallCudaDriver) Execute

func (t *InstallCudaDriver) Execute(runtime connector.Runtime) error

type InstallDriversModule

type InstallDriversModule struct {
	common.KubeModule
	manifest.ManifestModule
	Skip bool // enableGPU && ubuntuVersionSupport

	// log a failure message and then exit
	// instead of silently skip the jobs when:
	// 1. no card is found (which skips the driver installation)
	// 2. no driver is found (which skips the container toolkit installation)
	FailOnNoInstallation bool
}

func (*InstallDriversModule) Init

func (m *InstallDriversModule) Init()

func (*InstallDriversModule) IsSkip

func (m *InstallDriversModule) IsSkip() bool

type InstallNvidiaContainerToolkit

type InstallNvidiaContainerToolkit struct {
	common.KubeAction
}

func (*InstallNvidiaContainerToolkit) Execute

type InstallPlugin

type InstallPlugin struct {
	common.KubeAction
}

func (*InstallPlugin) Execute

func (t *InstallPlugin) Execute(runtime connector.Runtime) error

type InstallPluginModule

type InstallPluginModule struct {
	common.KubeModule
	Skip bool // enableGPU && ubuntuVersionSupport
}

func (*InstallPluginModule) Init

func (m *InstallPluginModule) Init()

func (*InstallPluginModule) IsSkip

func (m *InstallPluginModule) IsSkip() bool

type NodeLabelingModule

type NodeLabelingModule struct {
	common.KubeModule
}

func (*NodeLabelingModule) Init

func (l *NodeLabelingModule) Init()

type NodeUnlabelingModule

type NodeUnlabelingModule struct {
	common.KubeModule
}

func (*NodeUnlabelingModule) Init

func (l *NodeUnlabelingModule) Init()

type NvidiaGraphicsCard

type NvidiaGraphicsCard struct {
	common.KubePrepare
	ExitOnNotFound bool
}

func (*NvidiaGraphicsCard) PreCheck

func (p *NvidiaGraphicsCard) PreCheck(runtime connector.Runtime) (found bool, err error)

type PatchK3sDriver

type PatchK3sDriver struct {
	common.KubeAction
}

func (*PatchK3sDriver) Execute

func (t *PatchK3sDriver) Execute(runtime connector.Runtime) error

type PrintGpuStatus

type PrintGpuStatus struct {
	common.KubeAction
}

func (*PrintGpuStatus) Execute

func (t *PrintGpuStatus) Execute(runtime connector.Runtime) error

type PrintPluginsStatus

type PrintPluginsStatus struct {
	common.KubeAction
}

func (*PrintPluginsStatus) Execute

func (t *PrintPluginsStatus) Execute(runtime connector.Runtime) error

type RemoveContainerRuntimeConfig

type RemoveContainerRuntimeConfig struct {
	common.KubeAction
}

func (*RemoveContainerRuntimeConfig) Execute

type RemoveNodeLabels

type RemoveNodeLabels struct {
	common.KubeAction
}

func (*RemoveNodeLabels) Execute

func (u *RemoveNodeLabels) Execute(runtime connector.Runtime) error

type RestartContainerdModule

type RestartContainerdModule struct {
	common.KubeModule
	Skip bool // enableGPU && ubuntuVersionSupport
}

func (*RestartContainerdModule) Init

func (m *RestartContainerdModule) Init()

func (*RestartContainerdModule) IsSkip

func (m *RestartContainerdModule) IsSkip() bool

type RestartK3sServiceModule

type RestartK3sServiceModule struct {
	common.KubeModule
	Skip bool // enableGPU && ubuntuVersionSupport
}

func (*RestartK3sServiceModule) Init

func (m *RestartK3sServiceModule) Init()

func (*RestartK3sServiceModule) IsSkip

func (m *RestartK3sServiceModule) IsSkip() bool

type RestartPlugin

type RestartPlugin struct {
	common.KubeAction
}

func (*RestartPlugin) Execute

func (t *RestartPlugin) Execute(runtime connector.Runtime) error

type UninstallCudaModule

type UninstallCudaModule struct {
	common.KubeModule
}

func (*UninstallCudaModule) Init

func (l *UninstallCudaModule) Init()

type UninstallNvidiaDrivers

type UninstallNvidiaDrivers struct {
	common.KubeAction
}

func (*UninstallNvidiaDrivers) Execute

func (t *UninstallNvidiaDrivers) Execute(runtime connector.Runtime) error

type UpdateNodeGPUInfo

type UpdateNodeGPUInfo struct {
	common.KubeAction
}

func (*UpdateNodeGPUInfo) Execute

func (u *UpdateNodeGPUInfo) Execute(runtime connector.Runtime) error

type UpdateNvidiaContainerToolkitSource

type UpdateNvidiaContainerToolkitSource struct {
	common.KubeAction
	manifest.ManifestAction
}

func (*UpdateNvidiaContainerToolkitSource) Execute

type WriteNouveauBlacklist

type WriteNouveauBlacklist struct {
	common.KubeAction
}

func (*WriteNouveauBlacklist) Execute

func (t *WriteNouveauBlacklist) Execute(runtime connector.Runtime) error

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL