device

package
v1.55.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 17, 2026 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const MaxDeviceProperties = 64
View Source
const MaxExtraMetrics = 64
View Source
const MaxMountPath = 512
View Source
const MaxProcesses = 1024
View Source
const (
	MaxTopologyDevices = 64
)

Variables

This section is empty.

Functions

func GetDiskInfo

func GetDiskInfo(path string) (total int64)

func GetTotalHostRAMBytes

func GetTotalHostRAMBytes() int64

func GoLog

func GoLog(level *C.char, message *C.char)

GoLog is exported to C code via //export directive This function is called by C code (wrapper.c) to log messages using klog

Types

type AcceleratorInterface

type AcceleratorInterface struct {
	// contains filtered or unexported fields
}

AcceleratorInterface provides Go bindings for the C accelerator library using purego

func NewAcceleratorInterface

func NewAcceleratorInterface(libPath string) (*AcceleratorInterface, error)

NewAcceleratorInterface creates a new accelerator interface and loads the library

func (*AcceleratorInterface) AssignPartition

func (a *AcceleratorInterface) AssignPartition(templateID, deviceUUID string) (string, error)

AssignPartition assigns a partition to a device

func (*AcceleratorInterface) Close

func (a *AcceleratorInterface) Close() error

Close unloads the accelerator library

func (*AcceleratorInterface) GetAllDevices

func (a *AcceleratorInterface) GetAllDevices() ([]*api.DeviceInfo, error)

GetAllDevices retrieves all available devices from the accelerator library

func (*AcceleratorInterface) GetDeviceMetrics

func (a *AcceleratorInterface) GetDeviceMetrics(deviceUUIDs []string) ([]*api.GPUUsageMetrics, error)

GetDeviceMetrics retrieves device metrics for the specified device UUIDs

func (*AcceleratorInterface) GetProcessInformation

func (a *AcceleratorInterface) GetProcessInformation() ([]api.ProcessInformation, error)

GetProcessInformation retrieves process information (compute and memory utilization) for all processes on all devices. This combines the functionality of GetProcessComputeUtilization and GetProcessMemoryUtilization following AMD SMI style API design. Note: This directly calls the C API which returns all GPU processes, regardless of what Go tracks internally.

func (*AcceleratorInterface) GetTotalProcessCount

func (a *AcceleratorInterface) GetTotalProcessCount() int

GetTotalProcessCount returns the total number of processes across all devices

func (*AcceleratorInterface) GetVendorMountLibs

func (a *AcceleratorInterface) GetVendorMountLibs() ([]*api.Mount, error)

GetVendorMountLibs retrieves vendor mount libs

func (*AcceleratorInterface) Load

func (a *AcceleratorInterface) Load() error

Load loads the accelerator library dynamically using purego

func (*AcceleratorInterface) RemovePartition

func (a *AcceleratorInterface) RemovePartition(partitionUUID, deviceUUID string) error

RemovePartition removes a partition from a device

func (*AcceleratorInterface) SetComputeUnitHardLimit

func (a *AcceleratorInterface) SetComputeUnitHardLimit(workerID, deviceUUID string, computeUnitLimit uint32) error

SetComputeUnitHardLimit sets hard compute unit limit for a worker

func (*AcceleratorInterface) SetMemHardLimit

func (a *AcceleratorInterface) SetMemHardLimit(workerID, deviceUUID string, memoryLimitBytes uint64) error

SetMemHardLimit sets hard memory limit for a worker

type Controller

type Controller struct {
	// contains filtered or unexported fields
}

Controller manages GPU device discovery and lifecycle

func NewController

func NewController(ctx context.Context, acceleratorLibPath string, acceleratorVendor string, discoveryInterval time.Duration, isolationMode string) (*Controller, error)

NewController creates a new device manager

func (*Controller) AggregateNodeInfo

func (m *Controller) AggregateNodeInfo() *api.NodeInfo

func (*Controller) DiscoverDevices

func (m *Controller) DiscoverDevices() error

DiscoverDevices implements framework.DeviceController

func (*Controller) GetAcceleratorVendor

func (m *Controller) GetAcceleratorVendor() string

func (*Controller) GetDevice

func (m *Controller) GetDevice(deviceUUID string) (*api.DeviceInfo, bool)

GetDevice implements framework.DeviceController

func (*Controller) GetDeviceMetrics

func (m *Controller) GetDeviceMetrics() (map[string]*api.GPUUsageMetrics, error)

GetDeviceMetrics implements framework.DeviceController

func (*Controller) GetDevices

func (m *Controller) GetDevices() []*api.DeviceInfo

GetDevices returns all discovered devices

func (*Controller) GetProcessInformation

func (m *Controller) GetProcessInformation() ([]api.ProcessInformation, error)

GetProcessInformation implements framework.DeviceController Returns process-level GPU metrics for all processes on all devices

func (*Controller) GetVendorMountLibs

func (m *Controller) GetVendorMountLibs() ([]*api.Mount, error)

func (*Controller) ListDevices

func (m *Controller) ListDevices() ([]*api.DeviceInfo, error)

ListDevices implements framework.DeviceController

func (*Controller) RegisterDeviceUpdateHandler

func (m *Controller) RegisterDeviceUpdateHandler(handler framework.DeviceChangeHandler)

func (*Controller) RemovePartitionedDevice

func (m *Controller) RemovePartitionedDevice(partitionUUID, deviceUUID string) error

func (*Controller) SetAllocationController

func (m *Controller) SetAllocationController(allocationController framework.WorkerAllocationController)

SetAllocationController sets the allocation controller for telemetry purposes

func (*Controller) SplitDevice

func (m *Controller) SplitDevice(deviceUUID string, partitionTemplateID string) (*api.DeviceInfo, error)

func (*Controller) Start

func (m *Controller) Start() error

Start implements framework.DeviceController

func (*Controller) StartDiscoverDevices

func (m *Controller) StartDiscoverDevices() error

DiscoverDevices discovers all available GPU devices

func (*Controller) Stop

func (m *Controller) Stop() error

type DeviceBasicInfo

type DeviceBasicInfo struct {
	UUID              [64]byte  // C: char uuid[64]
	Vendor            [32]byte  // C: char vendor[32]
	Model             [128]byte // C: char model[128]
	DriverVersion     [64]byte  // C: char driverVersion[64]
	FirmwareVersion   [64]byte  // C: char firmwareVersion[64]
	Index             int32     // C: int32_t index
	NUMANode          int32     // C: int32_t numaNode
	TotalMemoryBytes  uint64    // C: uint64_t totalMemoryBytes
	TotalComputeUnits uint64    // C: uint64_t totalComputeUnits
	MaxTflops         float64   // C: double maxTflops
	PCIeGen           uint32    // C: uint32_t pcieGen
	PCIeWidth         uint32    // C: uint32_t pcieWidth
}

DeviceBasicInfo matches the C struct DeviceBasicInfo in vgpu-provider/accelerator.h Field names in Go are capitalized for export, but memory layout must match C struct exactly C struct fields: uuid, vendor, model, driverVersion, firmwareVersion, index, numaNode,

totalMemoryBytes, totalComputeUnits, maxTflops, pcieGen, pcieWidth

type DeviceMetrics

type DeviceMetrics struct {
	DeviceUUID         [64]byte
	PowerUsageWatts    float64
	TemperatureCelsius float64
	PCIeRxBytes        uint64
	PCIeTxBytes        uint64
	UtilizationPercent uint32
	MemoryUsedBytes    uint64
	ExtraMetrics       [MaxExtraMetrics]ExtraMetric
	ExtraMetricsCount  uintptr
}

type DeviceProperties

type DeviceProperties struct {
	Properties [MaxDeviceProperties]DevicePropertyKV
	Count      uintptr
}

type DevicePropertyKV

type DevicePropertyKV struct {
	Key   [64]byte
	Value [256]byte
}

type DeviceTopology

type DeviceTopology struct {
	DeviceUUID [64]byte
	NUMANode   int32
}

type ExtendedDeviceInfo

type ExtendedDeviceInfo struct {
	Basic        DeviceBasicInfo
	Props        DeviceProperties
	Capabilities VirtualizationCapabilities
}

type ExtendedDeviceTopology

type ExtendedDeviceTopology struct {
	Devices      [MaxTopologyDevices]DeviceTopology
	DeviceCount  uintptr
	TopologyType [32]byte
}

type ExtraMetric

type ExtraMetric struct {
	Key   [64]byte
	Value float64
}

type Mount

type Mount struct {
	HostPath  [MaxMountPath]byte
	GuestPath [MaxMountPath]byte
}

type PartitionAssignment

type PartitionAssignment struct {
	TemplateID    [64]byte
	DeviceUUID    [64]byte
	PartitionUUID [64]byte
}

type ProcessArray

type ProcessArray struct {
	ProcessIDs   [MaxProcesses]int32
	ProcessCount uintptr
	DeviceUUID   [64]byte
}

type ProcessInformation

type ProcessInformation struct {
	ProcessID                 [32]byte
	DeviceUUID                [64]byte
	ComputeUtilizationPercent float64
	ActiveSMs                 uint64
	TotalSMs                  uint64
	MemoryUsedBytes           uint64
	MemoryReservedBytes       uint64
	MemoryUtilizationPercent  float64
}

ProcessInformation combines compute and memory utilization (AMD SMI style)

type Result

type Result int32
const (
	ResultSuccess                Result = 0
	ResultErrorInvalidParam      Result = 1
	ResultErrorNotFound          Result = 2
	ResultErrorNotSupported      Result = 3
	ResultErrorResourceExhausted Result = 4
	ResultErrorOperationFailed   Result = 5
	ResultErrorInternal          Result = 6
)

type VirtualizationCapabilities

type VirtualizationCapabilities struct {
	SupportsPartitioning  bool
	SupportsSoftIsolation bool
	SupportsHardIsolation bool
	SupportsSnapshot      bool
	SupportsMetrics       bool
	SupportsRemoting      bool
	MaxPartitions         uint32
	MaxWorkersPerDevice   uint32
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL