session

package
v0.7.0-alpha.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 10, 2025 License: Apache-2.0 Imports: 38 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DefaultQuerySince = 30 * time.Minute
)

Variables

View Source
var ErrAutoUpdateDisabledButExitCodeSet = errors.New("auto update is disabled but auto update by exit code is set")

Functions

This section is empty.

Types

type Body

type Body struct {
	Data  []byte `json:"data,omitempty"`
	ReqID string `json:"req_id,omitempty"`
}

type BootstrapRequest added in v0.5.0

type BootstrapRequest struct {
	// TimeoutInSeconds is the timeout for the bootstrap script.
	// If not set, the default timeout is 10 seconds.
	TimeoutInSeconds int `json:"timeout_in_seconds,omitempty"`

	// ScriptBase64 is the base64 encoded script to run.
	ScriptBase64 string `json:"script_base64,omitempty"`
}

type BootstrapResponse added in v0.5.0

type BootstrapResponse struct {
	Output   string `json:"output,omitempty"`
	ExitCode int32  `json:"exit_code,omitempty"`
}

type Op

type Op struct {
	// contains filtered or unexported fields
}

type OpOption

type OpOption func(*Op)

func WithAuditLogger added in v0.5.0

func WithAuditLogger(auditLogger log.AuditLogger) OpOption

func WithAutoUpdateExitCode

func WithAutoUpdateExitCode(autoUpdateExitCode int) OpOption

Triggers an auto update of GPUd itself by exiting the process with the given exit code. Useful when the machine is managed by the Kubernetes daemonset and we want to trigger an auto update when the daemonset restarts the machine.

func WithComponentsRegistry added in v0.5.0

func WithComponentsRegistry(componentsRegistry components.Registry) OpOption

func WithEnableAutoUpdate

func WithEnableAutoUpdate(enableAutoUpdate bool) OpOption

func WithFaultInjector added in v0.5.0

func WithFaultInjector(faultInjector pkgfaultinjector.Injector) OpOption

func WithMachineID

func WithMachineID(machineID string) OpOption

func WithMetricsStore added in v0.5.0

func WithMetricsStore(metricsStore pkgmetrics.Store) OpOption

func WithNvidiaInstance added in v0.5.0

func WithNvidiaInstance(nvmlInstance nvidianvml.Instance) OpOption

func WithPipeInterval

func WithPipeInterval(t time.Duration) OpOption

func WithSavePluginSpecsFunc added in v0.5.0

func WithSavePluginSpecsFunc(savePluginSpecsFunc func(context.Context, pkgcustomplugins.Specs) (bool, error)) OpOption

type Request

type Request struct {
	Method        string            `json:"method,omitempty"`
	Components    []string          `json:"components,omitempty"`
	StartTime     time.Time         `json:"start_time"`
	EndTime       time.Time         `json:"end_time"`
	Since         time.Duration     `json:"since"`
	UpdateVersion string            `json:"update_version,omitempty"`
	UpdateConfig  map[string]string `json:"update_config,omitempty"`

	Bootstrap          *BootstrapRequest         `json:"bootstrap,omitempty"`
	InjectFaultRequest *pkgfaultinjector.Request `json:"inject_fault_request,omitempty"`

	// ComponentName is the name of the component to query or deregister.
	ComponentName string `json:"component_name,omitempty"`

	// TagName is the tag of the component to trigger check.
	// Optional. If set, it triggers all the component checks
	// that match this tag value.
	TagName string `json:"tag_name,omitempty"`

	// CustomPluginSpecs is the specs for the custom plugins to register or overwrite.
	CustomPluginSpecs pkgcustomplugins.Specs `json:"custom_plugin_specs,omitempty"`
}

Request is the request from the control plane to GPUd.

type Response

type Response struct {
	// Error is the error message from session processor.
	// don't use "error" type as it doesn't marshal/unmarshal well
	Error string `json:"error,omitempty"`
	// ErrorCode is the error code from session processor.
	// It uses the same semantics as the HTTP status code.
	// See: https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml
	ErrorCode int32 `json:"error_code,omitempty"`

	GossipRequest *apiv1.GossipRequest `json:"gossip_request,omitempty"`

	States  apiv1.GPUdComponentHealthStates `json:"states,omitempty"`
	Events  apiv1.GPUdComponentEvents       `json:"events,omitempty"`
	Metrics apiv1.GPUdComponentMetrics      `json:"metrics,omitempty"`

	Bootstrap *BootstrapResponse `json:"bootstrap,omitempty"`

	PackageStatus []apiv1.PackageStatus `json:"package_status,omitempty"`

	// CustomPluginSpecs lists the specs for the custom plugins.
	CustomPluginSpecs pkgcustomplugins.Specs `json:"custom_plugin_specs,omitempty"`
}

Response is the response from GPUd to the control plane.

type Session

type Session struct {
	// contains filtered or unexported fields
}

func NewSession

func NewSession(ctx context.Context, epLocalGPUdServer string, epControlPlane string, token string, opts ...OpOption) (*Session, error)

func (*Session) Stop

func (s *Session) Stop()

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL