Documentation
¶
Overview ¶
Package gpu collects GPU hardware and driver configuration data using a two-phase detection model.
Two-Phase Collection ¶
The collector runs two independent detection phases, each producing a separate measurement subtype:
Phase 1 ("hardware"): NFD-based PCI enumeration — detects NVIDIA GPUs
via sysfs PCI device scan and checks nvidia kernel module state.
No GPU drivers required. Requires Linux with sysfs mounted.
Phase 2 ("smi"): nvidia-smi XML query — collects driver version, CUDA
version, per-GPU hardware specs, and runtime settings. Requires
nvidia-smi in PATH with a loaded NVIDIA driver.
Phase 1 enables day-0 GPU detection on freshly provisioned nodes where drivers have not yet been installed. Phase 2 provides the full telemetry used for recipe generation and validation.
Graceful Degradation ¶
Each phase degrades independently:
- Phase 1 failure (e.g., no sysfs on macOS): logged as warning, skipped. Only the "smi" subtype is returned.
- Phase 2 failure (e.g., nvidia-smi not installed): logged as warning. A zero-GPU "smi" subtype is returned with gpu-count=0.
- Both phases fail: measurement contains only the zero-GPU "smi" subtype.
- Phase 1 nil (no HardwareDetector configured): Phase 1 is skipped entirely, preserving the pre-NFD single-phase behavior.
Measurement Structure ¶
A successful two-phase collection produces:
Measurement{
Type: "GPU",
Subtypes: [
{Name: "hardware", Data: {gpu-present, gpu-count, driver-loaded, detection-source}},
{Name: "smi", Data: {gpu-count, driver, cuda-version, gpu.model, ...}},
],
}
The "hardware" subtype keys are defined in pkg/measurement:
- KeyGPUPresent: bool — true if at least one NVIDIA GPU found via PCI
- KeyGPUCount: int — number of NVIDIA GPUs detected
- KeyGPUDriverLoaded: bool — true if nvidia kernel module is loaded
- KeyGPUDetectionSource: string — detection method (e.g., "nfd")
The "smi" subtype contains driver telemetry and per-GPU hardware details.
Usage ¶
The collector is created by the factory with NFD wiring:
collector := gpu.NewCollector(
gpu.WithHardwareDetector(&gpu.NFDHardwareDetector{}),
)
m, err := collector.Collect(ctx)
Without WithHardwareDetector, Phase 1 is skipped (backward compatible).
Context and Timeouts ¶
The collector respects context cancellation and applies a bounded timeout (defaults.CollectorTimeout). NFD detection has its own sub-timeout (defaults.NFDDetectionTimeout = 5s). The context is passed to each phase, so cancellation is respected within each phase's I/O operations.
Platform Support ¶
- Linux with sysfs: Both phases run (full two-phase detection)
- macOS / containers without /sys: Phase 1 fails gracefully, Phase 2 only
- No nvidia-smi: Phase 2 returns zero-GPU subtype
Index ¶
- type Aggregate
- type AggregateUncorrectableSramSources
- type ApplicationsClocks
- type Bar1MemoryUsage
- type Capabilities
- type CcProtectedMemoryUsage
- type ClockPolicy
- type Clocks
- type ClocksEventReasons
- type Collector
- type CollectorOption
- type DefaultApplicationsClocks
- type DeferredClocks
- type DoubleBitRetirement
- type DramEncryptionMode
- type DriverModel
- type EccErrors
- type EccMode
- type EncoderStats
- type Fabric
- type FbMemoryUsage
- type FbcStats
- type GPU
- type HardwareDetector
- type HardwareInfo
- type Health
- type Ibmnpu
- type InforomBbxFlush
- type InforomVersion
- type LinkWidths
- type MaxClocks
- type MaxCustomerBoostClocks
- type MemoryPowerReadings
- type MigMode
- type ModulePowerReadings
- type MultipleSingleBitRetirement
- type NFDHardwareDetector
- type NVSMIDevice
- type OperationMode
- type Pci
- type PciBridgeChip
- type PciGpuLinkInfo
- type PcieGen
- type PlatformInfo
- type PowerProfiles
- type PowerReadings
- type RemappedRows
- type ResetStatus
- type RetiredPages
- type RowRemapperHistogram
- type SupportedClocks
- type SupportedGpuTargetTemp
- type SupportedMemClock
- type Temperature
- type Utilization
- type VirtualizationMode
- type Volatile
- type Voltage
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Aggregate ¶
type Aggregate struct {
SramCorrectable string `xml:"sram_correctable" json:"sramCorrectable" yaml:"sramCorrectable"`
SramUncorrectableParity string `xml:"sram_uncorrectable_parity" json:"sramUncorrectableParity" yaml:"sramUncorrectableParity"`
SramUncorrectableSecded string `xml:"sram_uncorrectable_secded" json:"sramUncorrectableSecded" yaml:"sramUncorrectableSecded"`
DramCorrectable string `xml:"dram_correctable" json:"dramCorrectable" yaml:"dramCorrectable"`
DramUncorrectable string `xml:"dram_uncorrectable" json:"dramUncorrectable" yaml:"dramUncorrectable"`
SramThresholdExceeded string `xml:"sram_threshold_exceeded" json:"sramThresholdExceeded" yaml:"sramThresholdExceeded"`
}
type AggregateUncorrectableSramSources ¶
type AggregateUncorrectableSramSources struct {
SramL2 string `xml:"sram_l2" json:"sramL2" yaml:"sramL2"`
SramSm string `xml:"sram_sm" json:"sramSm" yaml:"sramSm"`
SramMicrocontroller string `xml:"sram_microcontroller" json:"sramMicrocontroller" yaml:"sramMicrocontroller"`
SramPcie string `xml:"sram_pcie" json:"sramPcie" yaml:"sramPcie"`
SramOther string `xml:"sram_other" json:"sramOther" yaml:"sramOther"`
}
type ApplicationsClocks ¶
type Bar1MemoryUsage ¶
type Capabilities ¶
type Capabilities struct {
Egm string `xml:"egm" json:"egm" yaml:"egm"`
}
type CcProtectedMemoryUsage ¶
type ClockPolicy ¶
type Clocks ¶
type Clocks struct {
GraphicsClock string `xml:"graphics_clock" json:"graphicsClock" yaml:"graphicsClock"`
SmClock string `xml:"sm_clock" json:"smClock" yaml:"smClock"`
MemClock string `xml:"mem_clock" json:"memClock" yaml:"memClock"`
VideoClock string `xml:"video_clock" json:"videoClock" yaml:"videoClock"`
}
type ClocksEventReasons ¶
type ClocksEventReasons struct {
ClocksEventReasonGpuIdle string `xml:"clocks_event_reason_gpu_idle" json:"clocksEventReasonGPUIdle" yaml:"clocksEventReasonGPUIdle"`
ClocksEventReasonApplicationsClocksSetting string `` /* 153-byte string literal not displayed */
ClocksEventReasonSwPowerCap string `xml:"clocks_event_reason_sw_power_cap" json:"clocksEventReasonSwPowerCap" yaml:"clocksEventReasonSwPowerCap"`
ClocksEventReasonHwSlowdown string `xml:"clocks_event_reason_hw_slowdown" json:"clocksEventReasonHwSlowdown" yaml:"clocksEventReasonHwSlowdown"`
ClocksEventReasonHwThermalSlowdown string `` /* 129-byte string literal not displayed */
ClocksEventReasonHwPowerBrakeSlowdown string `` /* 139-byte string literal not displayed */
ClocksEventReasonSyncBoost string `xml:"clocks_event_reason_sync_boost" json:"clocksEventReasonSyncBoost" yaml:"clocksEventReasonSyncBoost"`
ClocksEventReasonSwThermalSlowdown string `` /* 129-byte string literal not displayed */
ClocksEventReasonDisplayClocksSetting string `` /* 138-byte string literal not displayed */
}
type Collector ¶
type Collector struct {
// contains filtered or unexported fields
}
Collector collects NVIDIA SMI configurations from nvidia-smi command output in XML format and parses them into NVSMIDevice structures
func NewCollector ¶ added in v0.12.0
func NewCollector(opts ...CollectorOption) *Collector
NewCollector creates a GPU collector with the given options.
func (*Collector) Collect ¶
func (s *Collector) Collect(ctx context.Context) (*measurement.Measurement, error)
Collect retrieves GPU information in two phases:
- Phase 1 (hardware): NFD-based PCI detection when hardwareDetector is set
- Phase 2 (smi): existing nvidia-smi collection (always runs)
When hardwareDetector is nil, Phase 1 is skipped preserving pre-NFD behavior. On Phase 1 failure, collector logs a warning and proceeds with Phase 2.
type CollectorOption ¶ added in v0.12.0
type CollectorOption func(*Collector)
CollectorOption configures a Collector.
func WithCommandRunner ¶ added in v0.12.0
func WithCommandRunner(runner commandRunner) CollectorOption
WithCommandRunner sets a custom command runner for executing external tools. Used in tests to mock nvidia-smi execution.
func WithHardwareDetector ¶ added in v0.12.0
func WithHardwareDetector(d HardwareDetector) CollectorOption
WithHardwareDetector sets the hardware detector for Phase 1 GPU detection. When not set, Phase 1 is skipped and only nvidia-smi collection runs.
type DeferredClocks ¶
type DeferredClocks struct {
MemClock string `xml:"mem_clock" json:"memClock" yaml:"memClock"`
}
type DoubleBitRetirement ¶
type DramEncryptionMode ¶
type DriverModel ¶
type EccErrors ¶
type EccErrors struct {
Volatile Volatile `xml:"volatile" json:"volatile" yaml:"volatile"`
Aggregate Aggregate `xml:"aggregate" json:"aggregate" yaml:"aggregate"`
AggregateUncorrectableSramSources AggregateUncorrectableSramSources `xml:"aggregate_uncorrectable_sram_sources" json:"aggregateUncorrectableSramSources" yaml:"aggregateUncorrectableSramSources"`
}
type EncoderStats ¶
type Fabric ¶
type Fabric struct {
State string `xml:"state" json:"state" yaml:"state"`
Status string `xml:"status" json:"status" yaml:"status"`
Cliqueid string `xml:"cliqueId" json:"cliqueId" yaml:"cliqueId"`
Clusteruuid string `xml:"clusterUuid" json:"clusterUuid" yaml:"clusterUuid"`
Health Health `xml:"health" json:"health" yaml:"health"`
}
type FbMemoryUsage ¶
type GPU ¶
type GPU struct {
ProductName string `xml:"product_name" json:"productName" yaml:"productName"`
ProductBrand string `xml:"product_brand" json:"productBrand" yaml:"productBrand"`
ProductArchitecture string `xml:"product_architecture" json:"productArchitecture" yaml:"productArchitecture"`
DisplayMode string `xml:"display_mode" json:"displayMode" yaml:"displayMode"`
DisplayActive string `xml:"display_active" json:"displayActive" yaml:"displayActive"`
PersistenceMode string `xml:"persistence_mode" json:"persistenceMode" yaml:"persistenceMode"`
AddressingMode string `xml:"addressing_mode" json:"addressingMode" yaml:"addressingMode"`
MigMode MigMode `xml:"mig_mode" json:"migMode" yaml:"migMode"`
MigDevices string `xml:"mig_devices" json:"migDevices" yaml:"migDevices"`
AccountingMode string `xml:"accounting_mode" json:"accountingMode" yaml:"accountingMode"`
AccountingModeBufferSize string `xml:"accounting_mode_buffer_size" json:"accountingModeBufferSize" yaml:"accountingModeBufferSize"`
DriverModel DriverModel `xml:"driver_model" json:"driverModel" yaml:"driverModel"`
Serial string `xml:"serial" json:"serial" yaml:"serial"`
UUID string `xml:"uuid" json:"uuid" yaml:"uuid"`
MinorNumber string `xml:"minor_number" json:"minorNumber" yaml:"minorNumber"`
VbiosVersion string `xml:"vbios_version" json:"vbiosVersion" yaml:"vbiosVersion"`
MultigpuBoard string `xml:"multigpu_board" json:"multiGPUBoard" yaml:"multiGPUBoard"`
BoardID string `xml:"board_id" json:"boardId" yaml:"boardId"`
BoardPartNumber string `xml:"board_part_number" json:"boardPartNumber" yaml:"boardPartNumber"`
GpuPartNumber string `xml:"gpu_part_number" json:"gpuPartNumber" yaml:"gpuPartNumber"`
GpuFruPartNumber string `xml:"gpu_fru_part_number" json:"gpuFRUPartNumber" yaml:"gpuFRUPartNumber"`
PlatformInfo PlatformInfo `xml:"platformInfo" json:"platformInfo" yaml:"platformInfo"`
InforomVersion InforomVersion `xml:"inforom_version" json:"inforomVersion" yaml:"inforomVersion"`
InforomBbxFlush InforomBbxFlush `xml:"inforom_bbx_flush" json:"inforomBBXFlush" yaml:"inforomBBXFlush"`
GpuOperationMode OperationMode `xml:"gpu_operation_mode" json:"gpuOperationMode" yaml:"gpuOperationMode"`
C2cMode string `xml:"c2c_mode" json:"c2cMode" yaml:"c2cMode"`
GpuVirtualizationMode VirtualizationMode `xml:"gpu_virtualization_mode" json:"gpuVirtualizationMode" yaml:"gpuVirtualizationMode"`
GpuResetStatus ResetStatus `xml:"gpu_reset_status" json:"gpuResetStatus" yaml:"gpuResetStatus"`
GpuRecoveryAction string `xml:"gpu_recovery_action" json:"gpuRecoveryAction" yaml:"gpuRecoveryAction"`
GspFirmwareVersion string `xml:"gsp_firmware_version" json:"gspFirmwareVersion" yaml:"gspFirmwareVersion"`
Ibmnpu Ibmnpu `xml:"ibmnpu" json:"ibmnpu" yaml:"ibmnpu"`
Pci Pci `xml:"pci" json:"pci" yaml:"pci"`
FanSpeed string `xml:"fan_speed" json:"fanSpeed" yaml:"fanSpeed"`
PerformanceState string `xml:"performance_state" json:"performanceState" yaml:"performanceState"`
ClocksEventReasons ClocksEventReasons `xml:"clocks_event_reasons" json:"clocksEventReasons" yaml:"clocksEventReasons"`
SparseOperationMode string `xml:"sparse_operation_mode" json:"sparseOperationMode" yaml:"sparseOperationMode"`
FbMemoryUsage FbMemoryUsage `xml:"fb_memory_usage" json:"fbMemoryUsage" yaml:"fbMemoryUsage"`
Bar1MemoryUsage Bar1MemoryUsage `xml:"bar1_memory_usage" json:"bar1MemoryUsage" yaml:"bar1MemoryUsage"`
CcProtectedMemoryUsage CcProtectedMemoryUsage `xml:"cc_protected_memory_usage" json:"ccProtectedMemoryUsage" yaml:"ccProtectedMemoryUsage"`
ComputeMode string `xml:"compute_mode" json:"computeMode" yaml:"computeMode"`
Utilization Utilization `xml:"utilization" json:"utilization" yaml:"utilization"`
EncoderStats EncoderStats `xml:"encoder_stats" json:"encoderStats" yaml:"encoderStats"`
FbcStats FbcStats `xml:"fbc_stats" json:"fbcStats" yaml:"fbcStats"`
DramEncryptionMode DramEncryptionMode `xml:"dram_encryption_mode" json:"dramEncryptionMode" yaml:"dramEncryptionMode"`
EccMode EccMode `xml:"ecc_mode" json:"eccMode" yaml:"eccMode"`
EccErrors EccErrors `xml:"ecc_errors" json:"eccErrors" yaml:"eccErrors"`
RetiredPages RetiredPages `xml:"retired_pages" json:"retiredPages" yaml:"retiredPages"`
RemappedRows RemappedRows `xml:"remapped_rows" json:"remappedRows" yaml:"remappedRows"`
Temperature Temperature `xml:"temperature" json:"temperature" yaml:"temperature"`
SupportedGpuTargetTemp SupportedGpuTargetTemp `xml:"supported_gpu_target_temp" json:"supportedGpuTargetTemp" yaml:"supportedGpuTargetTemp"`
GpuPowerReadings PowerReadings `xml:"gpu_power_readings" json:"gpuPowerReadings" yaml:"gpuPowerReadings"`
GpuMemoryPowerReadings MemoryPowerReadings `xml:"gpu_memory_power_readings" json:"gpuMemoryPowerReadings" yaml:"gpuMemoryPowerReadings"`
ModulePowerReadings ModulePowerReadings `xml:"module_power_readings" json:"modulePowerReadings" yaml:"modulePowerReadings"`
PowerSmoothing string `xml:"power_smoothing" json:"powerSmoothing" yaml:"powerSmoothing"`
PowerProfiles PowerProfiles `xml:"power_profiles" json:"powerProfiles" yaml:"powerProfiles"`
Clocks Clocks `xml:"clocks" json:"clocks" yaml:"clocks"`
ApplicationsClocks ApplicationsClocks `xml:"applications_clocks" json:"applicationsClocks" yaml:"applicationsClocks"`
DefaultApplicationsClocks DefaultApplicationsClocks `xml:"default_applications_clocks" json:"defaultApplicationsClocks" yaml:"defaultApplicationsClocks"`
DeferredClocks DeferredClocks `xml:"deferred_clocks" json:"deferredClocks" yaml:"deferredClocks"`
MaxClocks MaxClocks `xml:"max_clocks" json:"maxClocks" yaml:"maxClocks"`
MaxCustomerBoostClocks MaxCustomerBoostClocks `xml:"max_customer_boost_clocks" json:"maxCustomerBoostClocks" yaml:"maxCustomerBoostClocks"`
ClockPolicy ClockPolicy `xml:"clock_policy" json:"clockPolicy" yaml:"clockPolicy"`
Voltage Voltage `xml:"voltage" json:"voltage" yaml:"voltage"`
Fabric Fabric `xml:"fabric" json:"fabric" yaml:"fabric"`
SupportedClocks SupportedClocks `xml:"supported_clocks" json:"supportedClocks" yaml:"supportedClocks"`
Processes string `xml:"processes" json:"processes" yaml:"processes"`
AccountedProcesses string `xml:"accounted_processes" json:"accountedProcesses" yaml:"accountedProcesses"`
Capabilities Capabilities `xml:"capabilities" json:"capabilities" yaml:"capabilities"`
}
type HardwareDetector ¶ added in v0.12.0
type HardwareDetector interface {
// Detect discovers GPU hardware and driver module state.
// Returns HardwareInfo describing what was found, or an error if
// detection could not be performed (e.g., sysfs not available).
Detect(ctx context.Context) (*HardwareInfo, error)
}
HardwareDetector abstracts GPU hardware detection for testability. Implementations enumerate PCI devices and kernel module state without requiring GPU drivers to be installed.
type HardwareInfo ¶ added in v0.12.0
type HardwareInfo struct {
// GPUPresent is true if at least one NVIDIA GPU was found via PCI enumeration.
GPUPresent bool
// GPUCount is the number of NVIDIA GPUs detected via PCI enumeration.
GPUCount int
// DriverLoaded is true if the nvidia kernel module is currently loaded.
DriverLoaded bool
// DetectionSource identifies which detection method produced this result
// (e.g., "nfd", "sysfs").
DetectionSource string
}
HardwareInfo describes the GPU hardware state detected without drivers.
type Health ¶
type Health struct {
Bandwidth string `xml:"bandwidth" json:"bandwidth" yaml:"bandwidth"`
RouteRecoveryInProgress string `xml:"route_recovery_in_progress" json:"routeRecoveryInProgress" yaml:"routeRecoveryInProgress"`
RouteUnhealthy string `xml:"route_unhealthy" json:"routeUnhealthy" yaml:"routeUnhealthy"`
AccessTimeoutRecovery string `xml:"access_timeout_recovery" json:"accessTimeoutRecovery" yaml:"accessTimeoutRecovery"`
}
type Ibmnpu ¶
type Ibmnpu struct {
RelaxedOrderingMode string `xml:"relaxed_ordering_mode" json:"relaxedOrderingMode" yaml:"relaxedOrderingMode"`
}
type InforomBbxFlush ¶
type InforomVersion ¶
type InforomVersion struct {
ImgVersion string `xml:"img_version" json:"imgVersion" yaml:"imgVersion"`
OemObject string `xml:"oem_object" json:"oemObject" yaml:"oemObject"`
EccObject string `xml:"ecc_object" json:"eccObject" yaml:"eccObject"`
PwrObject string `xml:"pwr_object" json:"pwrObject" yaml:"pwrObject"`
}
type LinkWidths ¶
type MaxClocks ¶
type MaxClocks struct {
GraphicsClock string `xml:"graphics_clock" json:"graphicsClock" yaml:"graphicsClock"`
SmClock string `xml:"sm_clock" json:"smClock" yaml:"smClock"`
MemClock string `xml:"mem_clock" json:"memClock" yaml:"memClock"`
VideoClock string `xml:"video_clock" json:"videoClock" yaml:"videoClock"`
}
type MaxCustomerBoostClocks ¶
type MaxCustomerBoostClocks struct {
GraphicsClock string `xml:"graphics_clock" json:"graphicsClock" yaml:"graphicsClock"`
}
type MemoryPowerReadings ¶
type MemoryPowerReadings struct {
PowerDraw string `xml:"power_draw" json:"powerDraw" yaml:"powerDraw"`
}
type ModulePowerReadings ¶
type ModulePowerReadings struct {
PowerState string `xml:"power_state" json:"powerState" yaml:"powerState"`
PowerDraw string `xml:"power_draw" json:"powerDraw" yaml:"powerDraw"`
CurrentPowerLimit string `xml:"current_power_limit" json:"currentPowerLimit" yaml:"currentPowerLimit"`
RequestedPowerLimit string `xml:"requested_power_limit" json:"requestedPowerLimit" yaml:"requestedPowerLimit"`
DefaultPowerLimit string `xml:"default_power_limit" json:"defaultPowerLimit" yaml:"defaultPowerLimit"`
MinPowerLimit string `xml:"min_power_limit" json:"minPowerLimit" yaml:"minPowerLimit"`
MaxPowerLimit string `xml:"max_power_limit" json:"maxPowerLimit" yaml:"maxPowerLimit"`
}
type NFDHardwareDetector ¶ added in v0.12.0
type NFDHardwareDetector struct{}
NFDHardwareDetector uses NFD source packages to detect GPU hardware via PCI enumeration and kernel module state from sysfs/procfs.
NFDHardwareDetector is not safe for concurrent use. NFD source singletons are shared package-level state without synchronization. In AICR's architecture the GPU collector runs once per snapshot, so this is not a practical concern.
func (*NFDHardwareDetector) Detect ¶ added in v0.12.0
func (d *NFDHardwareDetector) Detect(ctx context.Context) (*HardwareInfo, error)
Detect discovers GPU hardware using NFD PCI and kernel sources. PCI discovery is required; kernel module detection is best-effort.
This method requires Linux with sysfs/procfs mounted. On other platforms (macOS, containers without /sys), PCI discovery will fail and an error is returned. The caller (Collector.Collect) handles this gracefully by falling back to nvidia-smi-only collection.
type NVSMIDevice ¶
type NVSMIDevice struct {
Timestamp string `xml:"timestamp" json:"timestamp" yaml:"timestamp"`
DriverVersion string `xml:"driver_version" json:"driverVersion" yaml:"driverVersion"`
CudaVersion string `xml:"cuda_version" json:"cudaVersion" yaml:"cudaVersion"`
AttachedGpus int `xml:"attached_gpus" json:"attachedGPUs" yaml:"attachedGPUs"`
GPUs []GPU `xml:"gpu" json:"gpu" yaml:"gpu"`
}
type OperationMode ¶
type Pci ¶
type Pci struct {
PciBus string `xml:"pci_bus" json:"pciBus" yaml:"pciBus"`
PciDevice string `xml:"pci_device" json:"pciDevice" yaml:"pciDevice"`
PciDomain string `xml:"pci_domain" json:"pciDomain" yaml:"pciDomain"`
PciBaseClass string `xml:"pci_base_class" json:"pciBaseClass" yaml:"pciBaseClass"`
PciSubClass string `xml:"pci_sub_class" json:"pciSubClass" yaml:"pciSubClass"`
PciDeviceID string `xml:"pci_device_id" json:"pciDeviceId" yaml:"pciDeviceId"`
PciBusID string `xml:"pci_bus_id" json:"pciBusId" yaml:"pciBusId"`
PciSubSystemID string `xml:"pci_sub_system_id" json:"pciSubSystemId" yaml:"pciSubSystemId"`
PciGpuLinkInfo PciGpuLinkInfo `xml:"pci_gpu_link_info" json:"pciGPULinkInfo" yaml:"pciGPULinkInfo"`
PciBridgeChip PciBridgeChip `xml:"pci_bridge_chip" json:"pciBridgeChip" yaml:"pciBridgeChip"`
ReplayCounter string `xml:"replay_counter" json:"replayCounter" yaml:"replayCounter"`
ReplayRolloverCounter string `xml:"replay_rollover_counter" json:"replayRolloverCounter" yaml:"replayRolloverCounter"`
TxUtil string `xml:"tx_util" json:"txUtil" yaml:"txUtil"`
RxUtil string `xml:"rx_util" json:"rxUtil" yaml:"rxUtil"`
AtomicCapsOutbound string `xml:"atomic_caps_outbound" json:"atomicCapsOutbound" yaml:"atomicCapsOutbound"`
AtomicCapsInbound string `xml:"atomic_caps_inbound" json:"atomicCapsInbound" yaml:"atomicCapsInbound"`
}
type PciBridgeChip ¶
type PciGpuLinkInfo ¶
type PciGpuLinkInfo struct {
PcieGen PcieGen `xml:"pcie_gen" json:"pcieGen" yaml:"pcieGen"`
LinkWidths LinkWidths `xml:"link_widths" json:"linkWidths" yaml:"linkWidths"`
}
type PcieGen ¶
type PcieGen struct {
MaxLinkGen string `xml:"max_link_gen" json:"maxLinkGen" yaml:"maxLinkGen"`
CurrentLinkGen string `xml:"current_link_gen" json:"currentLinkGen" yaml:"currentLinkGen"`
DeviceCurrentLinkGen string `xml:"device_current_link_gen" json:"deviceCurrentLinkGen" yaml:"deviceCurrentLinkGen"`
MaxDeviceLinkGen string `xml:"max_device_link_gen" json:"maxDeviceLinkGen" yaml:"maxDeviceLinkGen"`
MaxHostLinkGen string `xml:"max_host_link_gen" json:"maxHostLinkGen" yaml:"maxHostLinkGen"`
}
type PlatformInfo ¶
type PlatformInfo struct {
ChassisSerialNumber string `xml:"chassis_serial_number" json:"chassisSerialNumber" yaml:"chassisSerialNumber"`
SlotNumber string `xml:"slot_number" json:"slotNumber" yaml:"slotNumber"`
TrayIndex string `xml:"tray_index" json:"trayIndex" yaml:"trayIndex"`
HostID string `xml:"host_id" json:"hostId" yaml:"hostId"`
PeerType string `xml:"peer_type" json:"peerType" yaml:"peerType"`
ModuleID string `xml:"module_id" json:"moduleId" yaml:"moduleId"`
}
type PowerProfiles ¶
type PowerProfiles struct {
PowerProfileRequestedProfiles string `xml:"power_profile_requested_profiles" json:"powerProfileRequestedProfiles" yaml:"powerProfileRequestedProfiles"`
PowerProfileEnforcedProfiles string `xml:"power_profile_enforced_profiles" json:"powerProfileEnforcedProfiles" yaml:"powerProfileEnforcedProfiles"`
}
type PowerReadings ¶
type PowerReadings struct {
PowerState string `xml:"power_state" json:"powerState" yaml:"powerState"`
PowerDraw string `xml:"power_draw" json:"powerDraw" yaml:"powerDraw"`
CurrentPowerLimit string `xml:"current_power_limit" json:"currentPowerLimit" yaml:"currentPowerLimit"`
RequestedPowerLimit string `xml:"requested_power_limit" json:"requestedPowerLimit" yaml:"requestedPowerLimit"`
DefaultPowerLimit string `xml:"default_power_limit" json:"defaultPowerLimit" yaml:"defaultPowerLimit"`
MinPowerLimit string `xml:"min_power_limit" json:"minPowerLimit" yaml:"minPowerLimit"`
MaxPowerLimit string `xml:"max_power_limit" json:"maxPowerLimit" yaml:"maxPowerLimit"`
}
type RemappedRows ¶
type RemappedRows struct {
RemappedRowCorr string `xml:"remapped_row_corr" json:"remappedRowCorr" yaml:"remappedRowCorr"`
RemappedRowUnc string `xml:"remapped_row_unc" json:"remappedRowUnc" yaml:"remappedRowUnc"`
RemappedRowPending string `xml:"remapped_row_pending" json:"remappedRowPending" yaml:"remappedRowPending"`
RemappedRowFailure string `xml:"remapped_row_failure" json:"remappedRowFailure" yaml:"remappedRowFailure"`
RowRemapperHistogram RowRemapperHistogram `xml:"row_remapper_histogram" json:"rowRemapperHistogram" yaml:"rowRemapperHistogram"`
}
type ResetStatus ¶
type RetiredPages ¶
type RetiredPages struct {
MultipleSingleBitRetirement MultipleSingleBitRetirement `xml:"multiple_single_bit_retirement" json:"multipleSingleBitRetirement" yaml:"multipleSingleBitRetirement"`
DoubleBitRetirement DoubleBitRetirement `xml:"double_bit_retirement" json:"doubleBitRetirement" yaml:"doubleBitRetirement"`
PendingBlacklist string `xml:"pending_blacklist" json:"pendingBlacklist" yaml:"pendingBlacklist"`
PendingRetirement string `xml:"pending_retirement" json:"pendingRetirement" yaml:"pendingRetirement"`
}
type RowRemapperHistogram ¶
type RowRemapperHistogram struct {
RowRemapperHistogramMax string `xml:"row_remapper_histogram_max" json:"rowRemapperHistogramMax" yaml:"rowRemapperHistogramMax"`
RowRemapperHistogramHigh string `xml:"row_remapper_histogram_high" json:"rowRemapperHistogramHigh" yaml:"rowRemapperHistogramHigh"`
RowRemapperHistogramPartial string `xml:"row_remapper_histogram_partial" json:"rowRemapperHistogramPartial" yaml:"rowRemapperHistogramPartial"`
RowRemapperHistogramLow string `xml:"row_remapper_histogram_low" json:"rowRemapperHistogramLow" yaml:"rowRemapperHistogramLow"`
RowRemapperHistogramNone string `xml:"row_remapper_histogram_none" json:"rowRemapperHistogramNone" yaml:"rowRemapperHistogramNone"`
}
type SupportedClocks ¶
type SupportedClocks struct {
SupportedMemClock SupportedMemClock `xml:"supported_mem_clock" json:"supportedMemClock" yaml:"supportedMemClock"`
}
type SupportedGpuTargetTemp ¶
type SupportedMemClock ¶
type Temperature ¶
type Temperature struct {
GpuTemp string `xml:"gpu_temp" json:"gpuTemp" yaml:"gpuTemp"`
GpuTempTlimit string `xml:"gpu_temp_tlimit" json:"gpuTempTlimit" yaml:"gpuTempTlimit"`
GpuTempMaxTlimitThreshold string `xml:"gpu_temp_max_tlimit_threshold" json:"gpuTempMaxTlimitThreshold" yaml:"gpuTempMaxTlimitThreshold"`
GpuTempSlowTlimitThreshold string `xml:"gpu_temp_slow_tlimit_threshold" json:"gpuTempSlowTlimitThreshold" yaml:"gpuTempSlowTlimitThreshold"`
GpuTempMaxGpuTlimitThreshold string `xml:"gpu_temp_max_gpu_tlimit_threshold" json:"gpuTempMaxGPUTlimitThreshold" yaml:"gpuTempMaxGPUTlimitThreshold"`
GpuTargetTemperature string `xml:"gpu_target_temperature" json:"gpuTargetTemperature" yaml:"gpuTargetTemperature"`
MemoryTemp string `xml:"memory_temp" json:"memoryTemp" yaml:"memoryTemp"`
GpuTempMaxMemTlimitThreshold string `xml:"gpu_temp_max_mem_tlimit_threshold" json:"gpuTempMaxMemTlimitThreshold" yaml:"gpuTempMaxMemTlimitThreshold"`
}
type Utilization ¶
type Utilization struct {
GpuUtil string `xml:"gpu_util" json:"gpuUtil" yaml:"gpuUtil"`
MemoryUtil string `xml:"memory_util" json:"memoryUtil" yaml:"memoryUtil"`
EncoderUtil string `xml:"encoder_util" json:"encoderUtil" yaml:"encoderUtil"`
DecoderUtil string `xml:"decoder_util" json:"decoderUtil" yaml:"decoderUtil"`
JpegUtil string `xml:"jpeg_util" json:"jpegUtil" yaml:"jpegUtil"`
OfaUtil string `xml:"ofa_util" json:"ofaUtil" yaml:"ofaUtil"`
}
type VirtualizationMode ¶
type VirtualizationMode struct {
VirtualizationMode string `xml:"virtualization_mode" json:"virtualizationMode" yaml:"virtualizationMode"`
HostVgpuMode string `xml:"host_vgpu_mode" json:"hostVGPUMode" yaml:"hostVGPUMode"`
VgpuHeterogeneousMode string `xml:"vgpu_heterogeneous_mode" json:"vgpuHeterogeneousMode" yaml:"vgpuHeterogeneousMode"`
}
type Volatile ¶
type Volatile struct {
SramCorrectable string `xml:"sram_correctable" json:"sramCorrectable" yaml:"sramCorrectable"`
SramUncorrectableParity string `xml:"sram_uncorrectable_parity" json:"sramUncorrectableParity" yaml:"sramUncorrectableParity"`
SramUncorrectableSecded string `xml:"sram_uncorrectable_secded" json:"sramUncorrectableSecded" yaml:"sramUncorrectableSecded"`
DramCorrectable string `xml:"dram_correctable" json:"dramCorrectable" yaml:"dramCorrectable"`
DramUncorrectable string `xml:"dram_uncorrectable" json:"dramUncorrectable" yaml:"dramUncorrectable"`
}