Documentation
¶
Overview ¶
Package config provides the gpud configuration data for the server.
Index ¶
- Constants
- Variables
- func DefaultConfigFile() (string, error)
- func DefaultContainerdComponent(ctx context.Context) (any, bool)
- func DefaultDmesgComponent(ctx context.Context) (any, bool, error)
- func DefaultDockerContainerComponent(ctx context.Context, ignoreConnectionErrors bool) (any, bool)
- func DefaultFifoFile() (string, error)
- func DefaultK8sPodComponent(ctx context.Context, ignoreConnectionErrors bool) (any, bool)
- func DefaultStateFile() (string, error)
- type Config
- type Op
- type OpOption
- type Web
Constants ¶
View Source
const ( DefaultAPIVersion = "v1" DefaultGPUdPort = 15132 )
Variables ¶
View Source
var ( DefaultRefreshPeriod = metav1.Duration{Duration: time.Minute} DefaultRetentionPeriod = metav1.Duration{Duration: 30 * time.Minute} DefaultRefreshComponentsInterval = metav1.Duration{Duration: time.Minute} )
View Source
var ( DefaultNVIDIALibraries = []string{ "libnvidia-ml.so", "libcuda.so", } DefaultNVIDIALibrariesSearchDirs = []string{ "/", "/usr/lib64", "/usr/lib/x86_64-linux-gnu", "/usr/lib/aarch64-linux-gnu", "/usr/lib/x86_64-linux-gnu/nvidia/current", "/usr/lib/aarch64-linux-gnu/nvidia/current", "/lib64", "/lib/x86_64-linux-gnu", "/lib/aarch64-linux-gnu", "/lib/x86_64-linux-gnu/nvidia/current", "/lib/aarch64-linux-gnu/nvidia/current", } )
View Source
var ErrInvalidAutoUpdateExitCode = errors.New("auto_update_exit_code is only valid when auto_update is enabled")
Functions ¶
func DefaultConfigFile ¶
func DefaultContainerdComponent ¶ added in v0.0.4
func DefaultDmesgComponent ¶ added in v0.0.4
func DefaultDockerContainerComponent ¶ added in v0.0.4
func DefaultFifoFile ¶
func DefaultK8sPodComponent ¶ added in v0.0.4
func DefaultStateFile ¶
Types ¶
type Config ¶
type Config struct {
APIVersion string `json:"api_version"`
// Basic server annotations (e.g., machine id, host name, etc.).
Annotations map[string]string `json:"annotations,omitempty"`
// Address for the server to listen on.
Address string `json:"address"`
// Component specific configurations.
Components map[string]any `json:"components,omitempty"`
// State file that persists the latest status.
// If empty, the states are not persisted to file.
State string `json:"state"`
// Amount of time to retain states/metrics for.
// Once elapsed, old states/metrics are purged/compacted.
RetentionPeriod metav1.Duration `json:"retention_period"`
// Interval at which to refresh selected components.
// Disables refresh if not set.
RefreshComponentsInterval metav1.Duration `json:"refresh_components_interval"`
// Set true to enable profiler.
Pprof bool `json:"pprof"`
// Configures the local web configuration.
Web *Web `json:"web,omitempty"`
// Set false to disable auto update
EnableAutoUpdate bool `json:"enable_auto_update"`
// Exit code to exit with when auto updating.
// Only valid when the auto update is enabled.
// Set -1 to disable the auto update by exit code.
AutoUpdateExitCode int `json:"auto_update_exit_code"`
}
Config provides gpud configuration data for the server
func LoadConfigYAML ¶
func ParseConfigYAML ¶
type Op ¶ added in v0.0.4
type Op struct {
FilesToCheck []string
KernelModulesToCheck []string
ExpectedPortStates *infiniband.ExpectedPortStates
DockerIgnoreConnectionErrors bool
KubeletIgnoreConnectionErrors bool
}
type OpOption ¶ added in v0.0.4
type OpOption func(*Op)
func WithDockerIgnoreConnectionErrors ¶ added in v0.0.5
func WithExpectedPortStates ¶ added in v0.2.0
func WithExpectedPortStates(exp infiniband.ExpectedPortStates) OpOption
func WithFilesToCheck ¶ added in v0.0.4
func WithKernelModulesToCheck ¶ added in v0.2.0
func WithKubeletIgnoreConnectionErrors ¶ added in v0.0.5
type Web ¶
type Web struct {
// Enable the web interface.
Enable bool `json:"enable"`
// Enable the admin interface.
Admin bool `json:"admin"`
// RefreshPeriod is the time period to refresh metrics.
RefreshPeriod metav1.Duration `json:"refresh_period"`
// SincePeriod is the time period to start displaying metrics from.
SincePeriod metav1.Duration `json:"since_period"`
}
Configures the local web configuration.
Click to show internal directories.
Click to hide internal directories.