worker

package
v1.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 13, 2026 License: Apache-2.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

View Source
const DefaultWorkerPort = 42352

DefaultWorkerPort returns the default worker port

Variables

This section is empty.

Functions

This section is empty.

Types

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager manages worker processes

func NewManager

func NewManager(stateDir, workerBinary string) *Manager

NewManager creates a new worker manager

func (*Manager) GetStatus

func (m *Manager) GetStatus(workerID string) (*WorkerState, error)

GetStatus returns the status of a worker

func (*Manager) List

func (m *Manager) List() []*WorkerState

List returns all worker states

func (*Manager) LoadStateFile

func (m *Manager) LoadStateFile() ([]WorkerConfig, error)

LoadStateFile loads worker state from tensor-fusion format file

func (*Manager) Reconcile

func (m *Manager) Reconcile(desiredWorkers []WorkerConfig) error

Reconcile reconciles the desired worker state with actual state

func (*Manager) SaveStateFile

func (m *Manager) SaveStateFile() error

SaveStateFile saves worker state to a file in tensor-fusion format

func (*Manager) Shutdown

func (m *Manager) Shutdown()

Shutdown stops all workers and cleans up

func (*Manager) Start

func (m *Manager) Start(config WorkerConfig) error

Start starts a worker process

func (*Manager) Stop

func (m *Manager) Stop(workerID string) error

Stop stops a worker process

type TensorFusionWorkerInfo

type TensorFusionWorkerInfo struct {
	WorkerUID        string   `json:"WorkerUID"`
	Namespace        string   `json:"Namespace,omitempty"`
	WorkerName       string   `json:"WorkerName,omitempty"`
	AllocatedDevices []string `json:"AllocatedDevices"`
	Status           string   `json:"Status"` // "Pending", "Running", "Terminated"
}

TensorFusionWorkerInfo represents the worker info format expected by tensor-fusion hypervisor

type WorkerConfig

type WorkerConfig struct {
	WorkerID         string     `json:"worker_id"`
	GPUIDs           []string   `json:"gpu_ids"`
	ListenPort       int        `json:"listen_port"`
	Mode             WorkerMode `json:"mode"`
	ShmemFile        string     `json:"shmem_file,omitempty"`
	ShmemSizeMB      int        `json:"shmem_size_mb,omitempty"`
	Enabled          bool       `json:"enabled"`
	WorkerBinaryPath string     `json:"worker_binary_path,omitempty"`
}

WorkerConfig represents configuration for a worker process

type WorkerMode

type WorkerMode string

WorkerMode represents the worker network mode

const (
	WorkerModeTCP   WorkerMode = "tcp"
	WorkerModeShmem WorkerMode = "shmem"
)

type WorkerState

type WorkerState struct {
	Config    WorkerConfig `json:"config"`
	Status    WorkerStatus `json:"status"`
	PID       int          `json:"pid,omitempty"`
	StartedAt *time.Time   `json:"started_at,omitempty"`
	Error     string       `json:"error,omitempty"`
}

WorkerState represents the runtime state of a worker

type WorkerStatus

type WorkerStatus string

WorkerStatus represents the current status of a worker

const (
	WorkerStatusPending    WorkerStatus = "Pending"
	WorkerStatusRunning    WorkerStatus = "Running"
	WorkerStatusStopping   WorkerStatus = "Stopping"
	WorkerStatusStopped    WorkerStatus = "Stopped"
	WorkerStatusTerminated WorkerStatus = "Terminated"
	WorkerStatusError      WorkerStatus = "Error"
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL