Documentation
¶
Overview ¶
Package ecc tracks the NVIDIA per-GPU ECC errors and other ECC related information.
Index ¶
Constants ¶
View Source
const ( StateNameECC = "ecc" StateKeyECCData = "data" StateKeyECCEncoding = "encoding" StateValueECCEncodingJSON = "json" )
View Source
const Name = "accelerator-nvidia-ecc"
Variables ¶
This section is empty.
Functions ¶
Types ¶
type Config ¶
type Config struct {
Query query_config.Config `json:"query"`
}
type Output ¶
type Output struct {
ECCModes []nvidia_query_nvml.ECCMode `json:"ecc_modes"`
ErrorCountsSMI []nvidia_query.SMIECCErrors `json:"error_counts_smi"`
ErrorCountsNVML []nvidia_query_nvml.ECCErrors `json:"error_counts_nvml"`
// Volatile counts are reset each time the driver loads.
// As aggregate counts persist across reboots (i.e. for the lifetime of the device),
// we do not track separately.
// ref. https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceEnumvs.html#group__nvmlDeviceEnumvs_1g08978d1c4fb52b6a4c72b39de144f1d9
//
// A memory error that was not corrected.
// For ECC errors, these are double bit errors.
// For Texture memory, these are errors where the resend fails.
// ref. https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceEnumvs.html#group__nvmlDeviceEnumvs_1gc5469bd68b9fdcf78734471d86becb24
VolatileUncorrectedErrorsFromSMI []string `json:"volatile_uncorrected_errors_from_smi"`
VolatileUncorrectedErrorsFromNVML []string `json:"volatile_uncorrected_errors_from_nvml"`
}
func ParseOutputJSON ¶
func ParseStatesToOutput ¶
func ParseStatesToOutput(states ...components.State) (*Output, error)
func ToOutput ¶
func ToOutput(i *nvidia_query.Output) *Output
ToOutput converts nvidia_query.Output to Output. It returns an empty non-nil object, if the input or the required field is nil (e.g., i.SMI).
Click to show internal directories.
Click to hide internal directories.