Documentation
¶
Overview ¶
Package config provides the gpud configuration data for the server.
Index ¶
Constants ¶
View Source
const ( DefaultAPIVersion = "v1" DefaultGPUdPort = 15132 )
Variables ¶
View Source
var ( DefaultRefreshPeriod = metav1.Duration{Duration: time.Minute} // keep the metrics only for the last 3 hours DefaultRetentionPeriod = metav1.Duration{Duration: 3 * time.Hour} // compact/vacuum is disruptive to existing queries (including reads) // but necessary to keep the state database from growing indefinitely // TODO: disabled for now, until we have a better way to detect the performance issue DefaultCompactPeriod = metav1.Duration{Duration: 0} )
View Source
var ErrInvalidAutoUpdateExitCode = errors.New("auto_update_exit_code is only valid when auto_update is enabled")
Functions ¶
func DefaultConfigFile ¶
func DefaultFifoFile ¶
func DefaultStateFile ¶
Types ¶
type Config ¶
type Config struct {
APIVersion string `json:"api_version"`
// Basic server annotations (e.g., machine id, host name, etc.).
Annotations map[string]string `json:"annotations,omitempty"`
// Address for the server to listen on.
Address string `json:"address"`
// Component specific configurations.
Components map[string]any `json:"components,omitempty"`
// State file that persists the latest status.
// If empty, the states are not persisted to file.
State string `json:"state"`
// Amount of time to retain states/metrics for.
// Once elapsed, old states/metrics are purged/compacted.
RetentionPeriod metav1.Duration `json:"retention_period"`
// Interval at which to compact the state database.
CompactPeriod metav1.Duration `json:"compact_period"`
// Set true to enable profiler.
Pprof bool `json:"pprof"`
// Configures the local web configuration.
Web *Web `json:"web,omitempty"`
// Overwrites the tool binaries for testing.
ToolOverwriteOptions ToolOverwriteOptions `json:"tool_overwrite_options"`
// Set false to disable auto update
EnableAutoUpdate bool `json:"enable_auto_update"`
// Exit code to exit with when auto updating.
// Only valid when the auto update is enabled.
// Set -1 to disable the auto update by exit code.
AutoUpdateExitCode int `json:"auto_update_exit_code"`
// Set false to disable the docker connection errors
DockerIgnoreConnectionErrors bool `json:"docker_ignore_connection_errors"`
// Set false to disable the kubelet connection errors
KubeletIgnoreConnectionErrors bool `json:"kubelet_ignore_connection_errors"`
// A list of files to check for its existence.
FilesToCheck []string `json:"files_to_check"`
// A list of kernel modules to check for its existence.
KernelModulesToCheck []string `json:"kernel_modules_to_check"`
// A list of nvidia tool command paths to overwrite the default paths.
NvidiaToolOverwrites nvidia_common.ToolOverwrites `json:"nvidia_tool_overwrites"`
}
Config provides gpud configuration data for the server
type Op ¶
type Op struct {
FilesToCheck []string
KernelModulesToCheck []string
DockerIgnoreConnectionErrors bool
KubeletIgnoreConnectionErrors bool
nvidia_common.ToolOverwrites
}
type OpOption ¶
type OpOption func(*Op)
func WithFilesToCheck ¶
func WithIbstatCommand ¶
Specifies the ibstat binary path to overwrite the default path.
type ToolOverwriteOptions ¶
type ToolOverwriteOptions struct {
IbstatCommand string `json:"ibstat_command"`
}
type Web ¶
type Web struct {
// Enable the web interface.
Enable bool `json:"enable"`
// Enable the admin interface.
Admin bool `json:"admin"`
// RefreshPeriod is the time period to refresh metrics.
RefreshPeriod metav1.Duration `json:"refresh_period"`
// SincePeriod is the time period to start displaying metrics from.
SincePeriod metav1.Duration `json:"since_period"`
}
Configures the local web configuration.
Click to show internal directories.
Click to hide internal directories.