cmd

package
v0.0.0-...-52ffa18 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 9, 2026 License: Apache-2.0 Imports: 33 Imported by: 0

Documentation

Index

Constants

View Source
const (
	FlexKey  = "f" // Monitor all GPUs if MIG is disabled or all GPU instances if MIG is enabled
	MajorKey = "g" // Monitor top-level entities: GPUs or NvSwitches or CPUs
	MinorKey = "i" // Monitor sub-level entities: GPU instances/NvLinks/CPUCores - GPUI cannot be specified if MIG is disabled

)
View Source
const (
	CLIFieldsFile                       = "collectors"
	CLIAddress                          = "address"
	CLICollectInterval                  = "collect-interval"
	CLIKubernetes                       = "kubernetes"
	CLIKubernetesEnablePodLabels        = "kubernetes-enable-pod-labels"
	CLIKubernetesEnablePodUID           = "kubernetes-enable-pod-uid"
	CLIKubernetesGPUIDType              = "kubernetes-gpu-id-type"
	CLIKubernetesPodLabelAllowlistRegex = "kubernetes-pod-label-allowlist-regex"
	CLIUseOldNamespace                  = "use-old-namespace"
	CLIRemoteHEInfo                     = "remote-hostengine-info"
	CLIGPUDevices                       = "devices"
	CLISwitchDevices                    = "switch-devices"
	CLICPUDevices                       = "cpu-devices"
	CLINoHostname                       = "no-hostname"
	CLIUseFakeGPUs                      = "fake-gpus"
	CLIConfigMapData                    = "configmap-data"
	CLIWebSystemdSocket                 = "web-systemd-socket"
	CLIWebConfigFile                    = "web-config-file"
	CLIXIDCountWindowSize               = "xid-count-window-size"
	CLIReplaceBlanksInModelName         = "replace-blanks-in-model-name"
	CLIDebugMode                        = "debug"
	CLIClockEventsCountWindowSize       = "clock-events-count-window-size"
	CLIEnableDCGMLog                    = "enable-dcgm-log"
	CLIDCGMLogLevel                     = "dcgm-log-level"
	CLILogFormat                        = "log-format"
	CLIPodResourcesKubeletSocket        = "pod-resources-kubelet-socket"
	CLIHPCJobMappingDir                 = "hpc-job-mapping-dir"
	CLINvidiaResourceNames              = "nvidia-resource-names"
	CLIKubernetesVirtualGPUs            = "kubernetes-virtual-gpus"
	CLIDumpEnabled                      = "dump-enabled"
	CLIDumpDirectory                    = "dump-directory"
	CLIDumpRetention                    = "dump-retention"
	CLIDumpCompression                  = "dump-compression"
	CLIKubernetesEnableDRA              = "kubernetes-enable-dra"
	CLIDisableStartupValidate           = "disable-startup-validate"
	CLIEnableGPUBindUnbindWatch         = "enable-gpu-bind-unbind-watch"
	CLIGPUBindUnbindPollInterval        = "gpu-bind-unbind-poll-interval"
)
View Source
const (
	DCGMDbgLvlNone  = "NONE"
	DCGMDbgLvlFatal = "FATAL"
	DCGMDbgLvlError = "ERROR"
	DCGMDbgLvlWarn  = "WARN"
	DCGMDbgLvlInfo  = "INFO"
	DCGMDbgLvlDebug = "DEBUG"
	DCGMDbgLvlVerb  = "VERB"
)

DCGMDbgLvl is a DCGM library debug level.

Variables

Functions

func NewApp

func NewApp(buildVersion ...string) *cli.App

func StartDCGMExporterWithSignalSource

func StartDCGMExporterWithSignalSource(c *cli.Context, sigSource SignalSource) error

StartDCGMExporterWithSignalSource starts the exporter with a custom signal source. This variant allows dependency injection for testing.

Types

type OSSignalSource

type OSSignalSource struct {
	// contains filtered or unexported fields
}

OSSignalSource watches actual OS signals (production use)

func NewOSSignalSource

func NewOSSignalSource(sigs ...os.Signal) *OSSignalSource

NewOSSignalSource creates a signal source that watches OS signals

func (*OSSignalSource) Cleanup

func (s *OSSignalSource) Cleanup()

Cleanup stops watching OS signals and closes the channel

func (*OSSignalSource) Signals

func (s *OSSignalSource) Signals() <-chan os.Signal

Signals returns the channel that receives OS signals

type SignalSource

type SignalSource interface {
	// Signals returns the channel that receives signals
	Signals() <-chan os.Signal
	// Cleanup stops signal watching and cleans up resources
	Cleanup()
}

SignalSource provides signals that trigger reload or shutdown. This interface allows dependency injection for testing.

type TestSignalSource

type TestSignalSource struct {
	// contains filtered or unexported fields
}

TestSignalSource allows programmatic signal injection for testing

func NewTestSignalSource

func NewTestSignalSource() *TestSignalSource

NewTestSignalSource creates a signal source for testing

func (*TestSignalSource) Cleanup

func (s *TestSignalSource) Cleanup()

Cleanup closes the signal channel

func (*TestSignalSource) SendSignal

func (s *TestSignalSource) SendSignal(sig os.Signal)

SendSignal injects a signal into the channel (test helper)

func (*TestSignalSource) Signals

func (s *TestSignalSource) Signals() <-chan os.Signal

Signals returns the channel that receives test signals

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL