gpu

package
v0.9.244 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 28, 2025 License: AGPL-3.0 Imports: 34 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CONTROLLER_ADDRESS_FORMATTER  = "unix:///tmp/cedana-gpu-controller-%s.sock"
	CONTROLLER_TERMINATE_SIGNAL   = syscall.SIGTERM
	CONTROLLER_LOG_FILE_FORMATTER = "cedana-gpu-controller-%s.log"
	CONTROLLER_LOG_FILE_MODE      = os.O_CREATE | os.O_WRONLY | os.O_APPEND
	CONTROLLER_LOG_FILE_PERMS     = 0o644
)
View Source
const (
	DB_SYNC_INTERVAL       = 10 * time.Second
	DB_SYNC_RETRY_INTERVAL = 1 * time.Second

	FREEZE_TIMEOUT   = 20 * time.Second
	UNFREEZE_TIMEOUT = 20 * time.Second
	DUMP_TIMEOUT     = 5 * time.Minute
	RESTORE_TIMEOUT  = 5 * time.Minute
	HEALTH_TIMEOUT   = 30 * time.Second
)

Variables

This section is empty.

Functions

func Attach

func Attach(gpus Manager) types.Adapter[types.Run]

Adapter that adds GPU support to the request.

func Dump added in v0.9.244

func Dump(gpus Manager) types.Adapter[types.Dump]

Adapter that adds GPU dump to the request.

func InheritFilesForRestore added in v0.9.244

func InheritFilesForRestore(next types.Restore) types.Restore

func Interception

func Interception(next types.Run) types.Run

Adapter that adds GPU interception to the request based on the job type. Each plugin must implement its own support for GPU interception.

func ProcessInterception

func ProcessInterception(next types.Run) types.Run

Adapter that adds GPU interception to a process job.

func Restore added in v0.9.244

func Restore(gpus Manager) types.Adapter[types.Restore]

Adapter that restores GPU support to the request.

Types

type Manager

type Manager interface {
	// Attach attaches a GPU controller to a process with the given PID.
	// Takes in a channel for the PID, allowing this to be called before the process is started,
	// so that the PID can be passed in later. Returns a unique ID for the GPU controller.
	Attach(ctx context.Context, user *syscall.Credential, pid <-chan uint32, env ...string) (string, error)

	// IsAttached returns true if GPU is attached to a process with the given PID.
	IsAttached(pid uint32) bool

	// Detach detaches the GPU controller from a process with the given and PID.
	Detach(pid uint32) error

	// Returns server-compatible health checks.
	Checks() types.Checks

	// GetID returns the ID of the GPU controller for a given PID.
	GetID(pid uint32) (string, error)

	// CRIUCallback returns the CRIU notify callback for GPU checkpoint/restore.
	CRIUCallback(id string, stream int32, env ...string) *criu.NotifyCallback

	// Sync is used to synchronize the manager with the current system state.
	Sync(ctx context.Context) error
}

type ManagerMissing

type ManagerMissing struct{}

Embed this into unimplmented implmentations

func (ManagerMissing) Attach

func (ManagerMissing) Attach(ctx context.Context, user *syscall.Credential, pid <-chan uint32, env ...string) (string, error)

func (ManagerMissing) CRIUCallback

func (ManagerMissing) CRIUCallback(id string, stream int32, env ...string) *criu.NotifyCallback

func (ManagerMissing) Checks

func (ManagerMissing) Checks() types.Checks

func (ManagerMissing) Detach

func (ManagerMissing) Detach(pid uint32) error

func (ManagerMissing) GetID added in v0.9.244

func (ManagerMissing) GetID(pid uint32) (string, error)

func (ManagerMissing) IsAttached

func (ManagerMissing) IsAttached(pid uint32) bool

func (ManagerMissing) Sync added in v0.9.244

func (ManagerMissing) Sync(ctx context.Context) error

type ManagerPool added in v0.9.244

type ManagerPool struct {
	// contains filtered or unexported fields
}

func NewPoolManager

func NewPoolManager(lifetime context.Context, serverWg *sync.WaitGroup, poolSize int, plugins plugins.Manager, db db.GPU) (*ManagerPool, error)

func (*ManagerPool) Attach added in v0.9.244

func (m *ManagerPool) Attach(ctx context.Context, user *syscall.Credential, pid <-chan uint32, env ...string) (id string,
	err error,
)

func (*ManagerPool) CRIUCallback added in v0.9.244

func (m *ManagerPool) CRIUCallback(id string, stream int32, env ...string) *criu_client.NotifyCallback

func (*ManagerPool) Checks added in v0.9.244

func (m *ManagerPool) Checks() types.Checks

func (*ManagerPool) Detach added in v0.9.244

func (m *ManagerPool) Detach(pid uint32) error

func (*ManagerPool) GetID added in v0.9.244

func (m *ManagerPool) GetID(pid uint32) (string, error)

func (*ManagerPool) IsAttached added in v0.9.244

func (m *ManagerPool) IsAttached(pid uint32) bool

func (*ManagerPool) Sync added in v0.9.244

func (m *ManagerPool) Sync(ctx context.Context) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL