worker

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 29, 2023 License: Apache-2.0 Imports: 18 Imported by: 0

Documentation

Index

Constants

View Source
const (
	WokerCommonSuffix = "-worker"

	RDMANodeLabel = "arcadia.kubeagi.k8s.com.cn/rdma"
)
View Source
const (
	// Resource
	ResourceNvidiaGPU corev1.ResourceName = "nvidia.com/gpu"
)

Variables

View Source
var (
	ErrNotImplementedYet = errors.New("not implemented yet")
	ErrModelNotReady     = errors.New("worker's model is not ready")

	// Default replicas for a worker
	// Only support 1 for now
	DefaultWorkerReplicas int32 = 1
)

Functions

func MakeMergedDeployment

func MakeMergedDeployment(target *appsv1.Deployment, desired *appsv1.Deployment) *appsv1.Deployment

func NumberOfGPUs

func NumberOfGPUs(resource corev1.ResourceList) string

NumberOfGPUs from ResourceList

Types

type Action

type Action string
const (
	Create Action = "create"
	Update Action = "update"
	Panic  Action = "panic"
)

func ActionOnError

func ActionOnError(err error) Action

type Device

type Device string

Device defines different types like cpu,gpu,xpu,npu which runs the model

const (
	CPU  Device = "cpu"
	CUDA Device = "cuda"
	// Not supported yet
	XPU Device = "xpu"
	// Not supported yet
	NPU Device = "npu"
)

func DeviceBasedOnResource

func DeviceBasedOnResource(resource corev1.ResourceList) Device

DeviceBasedOnResource returns the device type based on the resource list

func (Device) String

func (device Device) String() string

type LoaderGit

type LoaderGit struct{}

LoaderGit defines the way to load model from git

func (*LoaderGit) Build

func (loader *LoaderGit) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)

type LoaderOSS

type LoaderOSS struct {
	// contains filtered or unexported fields
}

LoaderOSS defines the way to load model from oss

func (*LoaderOSS) Build

func (loader *LoaderOSS) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)

Load nothing inner go code

type ModelLoader

type ModelLoader interface {
	Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}

ModelLoader load models for worker

func NewLoaderOSS

func NewLoaderOSS(ctx context.Context, c client.Client, endpoint *arcadiav1alpha1.Endpoint) (ModelLoader, error)

type ModelRunner

type ModelRunner interface {
	// Device used when running model
	Device() Device
	// Build a model runner instance
	Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}

ModelRunner run a model service

func NewRunnerFastchat

func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error)

func NewRunnerFastchatVLLM

func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error)

type PodWorker

type PodWorker struct {

	// worker's namespacedname
	types.NamespacedName
	// contains filtered or unexported fields
}

PodWorker hosts this worker in a single pod but with different loader and runner based on Worker's configuration

func (*PodWorker) AfterStart

func (podWorker *PodWorker) AfterStart(ctx context.Context) error

Actions to do after start this worker

func (*PodWorker) BeforeStart

func (podWorker *PodWorker) BeforeStart(ctx context.Context) error

BeforeStart will create resources which are related to this Worker Now we have a pvc(if configured),service,LLM(if a llm model),Embedder(if a embedding model)

func (*PodWorker) BeforeStop

func (podWorker *PodWorker) BeforeStop(ctx context.Context) error

TODO: BeforeStop

func (*PodWorker) Model

func (podWorker *PodWorker) Model() *arcadiav1alpha1.Model

Model that this worker is running for

func (*PodWorker) Start

func (podWorker *PodWorker) Start(ctx context.Context) error

Start will build and create worker pod which will host model service

func (*PodWorker) State

func (podWorker *PodWorker) State(ctx context.Context) (any, error)

State of this worker

func (*PodWorker) Stop

func (podWorker *PodWorker) Stop(ctx context.Context) error

TODO: Stop

func (*PodWorker) SuffixedName

func (podWorker *PodWorker) SuffixedName() string

func (*PodWorker) Worker

func (podWorker *PodWorker) Worker() *arcadiav1alpha1.Worker

type RDMALoader

type RDMALoader struct {
	// contains filtered or unexported fields
}

RDMALoader Support for RDMA. Allow Pod to user hostpath and RDMA to pull models faster and start services

func NewRDMALoader

func NewRDMALoader(c client.Client, modelName, workerUID string, source *arcadiav1alpha1.Datasource) *RDMALoader

func (*RDMALoader) Build

type RunnerFastchat

type RunnerFastchat struct {
	// contains filtered or unexported fields
}

RunnerFastchat use fastchat to run a model

func (*RunnerFastchat) Build

Build a runner instance

func (*RunnerFastchat) Device

func (runner *RunnerFastchat) Device() Device

func (*RunnerFastchat) NumberOfGPUs

func (runner *RunnerFastchat) NumberOfGPUs() string

NumberOfGPUs utilized by this runner

type RunnerFastchatVLLM

type RunnerFastchatVLLM struct {
	// contains filtered or unexported fields
}

RunnerFastchatVLLM use fastchat with vllm to run a model

func (*RunnerFastchatVLLM) Build

Build a runner instance

func (*RunnerFastchatVLLM) Device

func (runner *RunnerFastchatVLLM) Device() Device

Device used by this runner

func (*RunnerFastchatVLLM) NumberOfGPUs

func (runner *RunnerFastchatVLLM) NumberOfGPUs() string

NumberOfGPUs utilized by this runner

type Worker

type Worker interface {
	// Worker that this is for
	Worker() *arcadiav1alpha1.Worker
	// Model that this worker is running for
	Model() *arcadiav1alpha1.Model

	// Actions to do before start this worker
	BeforeStart(ctx context.Context) error
	// Actions to do when Start this worker
	Start(ctx context.Context) error
	// Actions to do after start this worker
	AfterStart(ctx context.Context) error

	// Actions to do before stop this worker
	BeforeStop(ctx context.Context) error
	// Actions to do when Stop this worker
	Stop(ctx context.Context) error

	// State of this worker
	State(context.Context) (any, error)
}

Worker implement the lifecycle management of a LLM worker

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL