gpu

package
v0.9.242 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 2, 2025 License: AGPL-3.0 Imports: 29 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CONTROLLER_HOST = "localhost"

	// Signal sent to job when GPU controller exits prematurely. The intercepted job
	// is guaranteed to exit upon receiving this signal, and prints to stderr
	// about the GPU controller's failure.
	CONTROLLER_PREMATURE_EXIT_SIGNAL = syscall.SIGUSR1

	CONTROLLER_LOG_FILE_FORMATTER = "cedana-gpu-controller-%s.log"
	CONTROLLER_LOG_FILE_MODE      = os.O_CREATE | os.O_WRONLY | os.O_APPEND
	CONTROLLER_LOG_FILE_PERMS     = 0o644
)
View Source
const (
	FREEZE_TIMEOUT   = 20 * time.Second
	UNFREEZE_TIMEOUT = 20 * time.Second
	DUMP_TIMEOUT     = 5 * time.Minute
	RESTORE_TIMEOUT  = 5 * time.Minute
	HEALTH_TIMEOUT   = 30 * time.Second
)

Variables

This section is empty.

Functions

func Attach

func Attach(gpus Manager) types.Adapter[types.Run]

Adapter that adds GPU support to the request. GPU Dump/Restore is automatically managed by the job manager using CRIU callbacks. Assumes the job is already created (not running).

func Interception

func Interception(next types.Run) types.Run

Adapter that adds GPU interception to the request based on the job type. Each plugin must implement its own support for GPU interception.

func ProcessInterception

func ProcessInterception(next types.Run) types.Run

Adapter that adds GPU interception to a process job.

Types

type Manager

type Manager interface {
	// Attach attaches a GPU controller to a process with the given JID, and PID.
	// Takse in a channel for the PID, allowing this to be called before the process is started,
	// so that the PID can be passed in later.
	Attach(ctx context.Context, lifetime context.Context, jid string, user *syscall.Credential, pid <-chan uint32, env []string) error

	// AttachAsync calls Attach in background.
	// Returns a channel that will receive an error if the attach fails.
	AttachAsync(ctx context.Context, lifetime context.Context, jid string, user *syscall.Credential, pid <-chan uint32, env []string) <-chan error

	// IsAttached returns true if GPU is attached to for the given JID.
	IsAttached(jid string) bool

	// Detach detaches the GPU controller from a process with the given JID, and PID.
	Detach(jid string) error

	// Returns server-compatible health checks.
	Checks() types.Checks

	// CRIUCallback returns the CRIU notify callback for GPU C/R.
	CRIUCallback(lifetime context.Context, jid string, user *syscall.Credential, stream int32, env ...string) *criu.NotifyCallback
}

type ManagerMissing

type ManagerMissing struct{}

Embed this into unimplmented implmentations

func (ManagerMissing) Attach

func (ManagerMissing) Attach(ctx context.Context, lifetime context.Context, jid string, user *syscall.Credential, pid <-chan uint32, env []string) error

func (ManagerMissing) AttachAsync

func (ManagerMissing) AttachAsync(ctx context.Context, lifetime context.Context, jid string, user *syscall.Credential, pid <-chan uint32, env []string) <-chan error

func (ManagerMissing) CRIUCallback

func (ManagerMissing) CRIUCallback(lifetime context.Context, jid string, user *syscall.Credential, stream int32, env ...string) *criu.NotifyCallback

func (ManagerMissing) Checks

func (ManagerMissing) Checks() types.Checks

func (ManagerMissing) Detach

func (ManagerMissing) Detach(jid string) error

func (ManagerMissing) IsAttached

func (ManagerMissing) IsAttached(jid string) bool

type ManagerSimple

type ManagerSimple struct {
	// contains filtered or unexported fields
}

func NewSimpleManager

func NewSimpleManager(serverWg *sync.WaitGroup, plugins plugins.Manager) *ManagerSimple

func (*ManagerSimple) Attach

func (m *ManagerSimple) Attach(ctx context.Context, lifetime context.Context, jid string, user *syscall.Credential, pid <-chan uint32, env []string) error

func (*ManagerSimple) AttachAsync

func (m *ManagerSimple) AttachAsync(ctx context.Context, lifetime context.Context, jid string, user *syscall.Credential, pid <-chan uint32, env []string) <-chan error

func (*ManagerSimple) CRIUCallback

func (m *ManagerSimple) CRIUCallback(lifetime context.Context, jid string, user *syscall.Credential, stream int32, env ...string) *criu_client.NotifyCallback

func (*ManagerSimple) Checks

func (m *ManagerSimple) Checks() types.Checks

func (*ManagerSimple) Detach

func (m *ManagerSimple) Detach(jid string) error

func (*ManagerSimple) IsAttached

func (m *ManagerSimple) IsAttached(jid string) bool

type PoolManager

type PoolManager struct {
	ManagerMissing
}

func NewPoolManager

func NewPoolManager(ctx context.Context, wg *sync.WaitGroup, size int) *PoolManager

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL