Documentation
¶
Index ¶
- Constants
- func GetDiskInfo(path string) (total int64)
- func GetTotalHostRAMBytes() int64
- func GoLog(level *C.char, message *C.char)
- type AcceleratorInterface
- func (a *AcceleratorInterface) AssignPartition(templateID, deviceUUID string) (string, error)
- func (a *AcceleratorInterface) Close() error
- func (a *AcceleratorInterface) GetAllDevices() ([]*api.DeviceInfo, error)
- func (a *AcceleratorInterface) GetDeviceMetrics(deviceUUIDs []string) ([]*api.GPUUsageMetrics, error)
- func (a *AcceleratorInterface) GetProcessInformation() ([]api.ProcessInformation, error)
- func (a *AcceleratorInterface) GetTotalProcessCount() int
- func (a *AcceleratorInterface) GetVendorMountLibs() ([]*api.Mount, error)
- func (a *AcceleratorInterface) Load() error
- func (a *AcceleratorInterface) RemovePartition(partitionUUID, deviceUUID string) error
- func (a *AcceleratorInterface) SetComputeUnitHardLimit(workerID, deviceUUID string, computeUnitLimit uint32) error
- func (a *AcceleratorInterface) SetMemHardLimit(workerID, deviceUUID string, memoryLimitBytes uint64) error
- type Controller
- func (m *Controller) AggregateNodeInfo() *api.NodeInfo
- func (m *Controller) DiscoverDevices() error
- func (m *Controller) GetAcceleratorVendor() string
- func (m *Controller) GetDevice(deviceUUID string) (*api.DeviceInfo, bool)
- func (m *Controller) GetDeviceMetrics() (map[string]*api.GPUUsageMetrics, error)
- func (m *Controller) GetDevices() []*api.DeviceInfo
- func (m *Controller) GetProcessInformation() ([]api.ProcessInformation, error)
- func (m *Controller) GetVendorMountLibs() ([]*api.Mount, error)
- func (m *Controller) ListDevices() ([]*api.DeviceInfo, error)
- func (m *Controller) RegisterDeviceUpdateHandler(handler framework.DeviceChangeHandler)
- func (m *Controller) RemovePartitionedDevice(partitionUUID, deviceUUID string) error
- func (m *Controller) SetAllocationController(allocationController framework.WorkerAllocationController)
- func (m *Controller) SplitDevice(deviceUUID string, partitionTemplateID string) (*api.DeviceInfo, error)
- func (m *Controller) Start() error
- func (m *Controller) StartDiscoverDevices() error
- func (m *Controller) Stop() error
- type DeviceBasicInfo
- type DeviceMetrics
- type DeviceProperties
- type DevicePropertyKV
- type DeviceTopology
- type ExtendedDeviceInfo
- type ExtendedDeviceTopology
- type ExtraMetric
- type Mount
- type PartitionAssignment
- type ProcessArray
- type ProcessInformation
- type Result
- type VirtualizationCapabilities
Constants ¶
const MaxDeviceProperties = 64
const MaxExtraMetrics = 64
const MaxMountPath = 512
const MaxProcesses = 1024
const (
MaxTopologyDevices = 64
)
Variables ¶
This section is empty.
Functions ¶
func GetDiskInfo ¶
func GetTotalHostRAMBytes ¶
func GetTotalHostRAMBytes() int64
Types ¶
type AcceleratorInterface ¶
type AcceleratorInterface struct {
// contains filtered or unexported fields
}
AcceleratorInterface provides Go bindings for the C accelerator library using purego
func NewAcceleratorInterface ¶
func NewAcceleratorInterface(libPath string) (*AcceleratorInterface, error)
NewAcceleratorInterface creates a new accelerator interface and loads the library
func (*AcceleratorInterface) AssignPartition ¶
func (a *AcceleratorInterface) AssignPartition(templateID, deviceUUID string) (string, error)
AssignPartition assigns a partition to a device
func (*AcceleratorInterface) Close ¶
func (a *AcceleratorInterface) Close() error
Close unloads the accelerator library
func (*AcceleratorInterface) GetAllDevices ¶
func (a *AcceleratorInterface) GetAllDevices() ([]*api.DeviceInfo, error)
GetAllDevices retrieves all available devices from the accelerator library
func (*AcceleratorInterface) GetDeviceMetrics ¶
func (a *AcceleratorInterface) GetDeviceMetrics(deviceUUIDs []string) ([]*api.GPUUsageMetrics, error)
GetDeviceMetrics retrieves device metrics for the specified device UUIDs
func (*AcceleratorInterface) GetProcessInformation ¶
func (a *AcceleratorInterface) GetProcessInformation() ([]api.ProcessInformation, error)
GetProcessInformation retrieves process information (compute and memory utilization) for all processes on all devices. This combines the functionality of GetProcessComputeUtilization and GetProcessMemoryUtilization following AMD SMI style API design. Note: This directly calls the C API which returns all GPU processes, regardless of what Go tracks internally.
func (*AcceleratorInterface) GetTotalProcessCount ¶
func (a *AcceleratorInterface) GetTotalProcessCount() int
GetTotalProcessCount returns the total number of processes across all devices
func (*AcceleratorInterface) GetVendorMountLibs ¶
func (a *AcceleratorInterface) GetVendorMountLibs() ([]*api.Mount, error)
GetVendorMountLibs retrieves vendor mount libs
func (*AcceleratorInterface) Load ¶
func (a *AcceleratorInterface) Load() error
Load loads the accelerator library dynamically using purego
func (*AcceleratorInterface) RemovePartition ¶
func (a *AcceleratorInterface) RemovePartition(partitionUUID, deviceUUID string) error
RemovePartition removes a partition from a device
func (*AcceleratorInterface) SetComputeUnitHardLimit ¶
func (a *AcceleratorInterface) SetComputeUnitHardLimit(workerID, deviceUUID string, computeUnitLimit uint32) error
SetComputeUnitHardLimit sets hard compute unit limit for a worker
func (*AcceleratorInterface) SetMemHardLimit ¶
func (a *AcceleratorInterface) SetMemHardLimit(workerID, deviceUUID string, memoryLimitBytes uint64) error
SetMemHardLimit sets hard memory limit for a worker
type Controller ¶
type Controller struct {
// contains filtered or unexported fields
}
Controller manages GPU device discovery and lifecycle
func NewController ¶
func NewController(ctx context.Context, acceleratorLibPath string, acceleratorVendor string, discoveryInterval time.Duration, isolationMode string) (*Controller, error)
NewController creates a new device manager
func (*Controller) AggregateNodeInfo ¶
func (m *Controller) AggregateNodeInfo() *api.NodeInfo
func (*Controller) DiscoverDevices ¶
func (m *Controller) DiscoverDevices() error
DiscoverDevices implements framework.DeviceController
func (*Controller) GetAcceleratorVendor ¶
func (m *Controller) GetAcceleratorVendor() string
func (*Controller) GetDevice ¶
func (m *Controller) GetDevice(deviceUUID string) (*api.DeviceInfo, bool)
GetDevice implements framework.DeviceController
func (*Controller) GetDeviceMetrics ¶
func (m *Controller) GetDeviceMetrics() (map[string]*api.GPUUsageMetrics, error)
GetDeviceMetrics implements framework.DeviceController
func (*Controller) GetDevices ¶
func (m *Controller) GetDevices() []*api.DeviceInfo
GetDevices returns all discovered devices
func (*Controller) GetProcessInformation ¶
func (m *Controller) GetProcessInformation() ([]api.ProcessInformation, error)
GetProcessInformation implements framework.DeviceController Returns process-level GPU metrics for all processes on all devices
func (*Controller) GetVendorMountLibs ¶
func (m *Controller) GetVendorMountLibs() ([]*api.Mount, error)
func (*Controller) ListDevices ¶
func (m *Controller) ListDevices() ([]*api.DeviceInfo, error)
ListDevices implements framework.DeviceController
func (*Controller) RegisterDeviceUpdateHandler ¶
func (m *Controller) RegisterDeviceUpdateHandler(handler framework.DeviceChangeHandler)
func (*Controller) RemovePartitionedDevice ¶
func (m *Controller) RemovePartitionedDevice(partitionUUID, deviceUUID string) error
func (*Controller) SetAllocationController ¶
func (m *Controller) SetAllocationController(allocationController framework.WorkerAllocationController)
SetAllocationController sets the allocation controller for telemetry purposes
func (*Controller) SplitDevice ¶
func (m *Controller) SplitDevice(deviceUUID string, partitionTemplateID string) (*api.DeviceInfo, error)
func (*Controller) Start ¶
func (m *Controller) Start() error
Start implements framework.DeviceController
func (*Controller) StartDiscoverDevices ¶
func (m *Controller) StartDiscoverDevices() error
DiscoverDevices discovers all available GPU devices
func (*Controller) Stop ¶
func (m *Controller) Stop() error
type DeviceBasicInfo ¶
type DeviceBasicInfo struct {
UUID [64]byte // C: char uuid[64]
Vendor [32]byte // C: char vendor[32]
Model [128]byte // C: char model[128]
DriverVersion [64]byte // C: char driverVersion[64]
FirmwareVersion [64]byte // C: char firmwareVersion[64]
Index int32 // C: int32_t index
NUMANode int32 // C: int32_t numaNode
TotalMemoryBytes uint64 // C: uint64_t totalMemoryBytes
TotalComputeUnits uint64 // C: uint64_t totalComputeUnits
MaxTflops float64 // C: double maxTflops
PCIeGen uint32 // C: uint32_t pcieGen
PCIeWidth uint32 // C: uint32_t pcieWidth
}
DeviceBasicInfo matches the C struct DeviceBasicInfo in vgpu-provider/accelerator.h Field names in Go are capitalized for export, but memory layout must match C struct exactly C struct fields: uuid, vendor, model, driverVersion, firmwareVersion, index, numaNode,
totalMemoryBytes, totalComputeUnits, maxTflops, pcieGen, pcieWidth
type DeviceMetrics ¶
type DeviceMetrics struct {
DeviceUUID [64]byte
PowerUsageWatts float64
TemperatureCelsius float64
PCIeRxBytes uint64
PCIeTxBytes uint64
UtilizationPercent uint32
MemoryUsedBytes uint64
ExtraMetrics [MaxExtraMetrics]ExtraMetric
ExtraMetricsCount uintptr
}
type DeviceProperties ¶
type DeviceProperties struct {
Properties [MaxDeviceProperties]DevicePropertyKV
Count uintptr
}
type DevicePropertyKV ¶
type DeviceTopology ¶
type ExtendedDeviceInfo ¶
type ExtendedDeviceInfo struct {
Basic DeviceBasicInfo
Props DeviceProperties
Capabilities VirtualizationCapabilities
}
type ExtendedDeviceTopology ¶
type ExtendedDeviceTopology struct {
Devices [MaxTopologyDevices]DeviceTopology
DeviceCount uintptr
TopologyType [32]byte
}
type ExtraMetric ¶
type Mount ¶
type Mount struct {
HostPath [MaxMountPath]byte
GuestPath [MaxMountPath]byte
}
type PartitionAssignment ¶
type ProcessArray ¶
type ProcessArray struct {
ProcessIDs [MaxProcesses]int32
ProcessCount uintptr
DeviceUUID [64]byte
}
type ProcessInformation ¶
type ProcessInformation struct {
ProcessID [32]byte
DeviceUUID [64]byte
ComputeUtilizationPercent float64
ActiveSMs uint64
TotalSMs uint64
MemoryUsedBytes uint64
MemoryReservedBytes uint64
MemoryUtilizationPercent float64
}
ProcessInformation combines compute and memory utilization (AMD SMI style)