Documentation
¶
Index ¶
- Constants
- Variables
- func MakeMergedDeployment(target *appsv1.Deployment, desired *appsv1.Deployment) *appsv1.Deployment
- func NumberOfGPUs(resource corev1.ResourceList) string
- type Action
- type Device
- type LoaderGit
- type LoaderOSS
- type ModelLoader
- type ModelRunner
- type PodWorker
- func (podWorker *PodWorker) AfterStart(ctx context.Context) error
- func (podWorker *PodWorker) BeforeStart(ctx context.Context) error
- func (podWorker *PodWorker) BeforeStop(ctx context.Context) error
- func (podWorker *PodWorker) Model() *arcadiav1alpha1.Model
- func (podWorker *PodWorker) Start(ctx context.Context) error
- func (podWorker *PodWorker) State(ctx context.Context) (any, error)
- func (podWorker *PodWorker) Stop(ctx context.Context) error
- func (podWorker *PodWorker) SuffixedName() string
- func (podWorker *PodWorker) Worker() *arcadiav1alpha1.Worker
- type RDMALoader
- type RunnerFastchat
- type RunnerFastchatVLLM
- type Worker
Constants ¶
const ( WokerCommonSuffix = "-worker" RDMANodeLabel = "arcadia.kubeagi.k8s.com.cn/rdma" )
const ( // Resource ResourceNvidiaGPU corev1.ResourceName = "nvidia.com/gpu" )
Variables ¶
Functions ¶
func MakeMergedDeployment ¶
func MakeMergedDeployment(target *appsv1.Deployment, desired *appsv1.Deployment) *appsv1.Deployment
func NumberOfGPUs ¶
func NumberOfGPUs(resource corev1.ResourceList) string
NumberOfGPUs from ResourceList
Types ¶
type Device ¶
type Device string
Device defines different types like cpu,gpu,xpu,npu which runs the model
func DeviceBasedOnResource ¶
func DeviceBasedOnResource(resource corev1.ResourceList) Device
DeviceBasedOnResource returns the device type based on the resource list
type LoaderGit ¶
type LoaderGit struct{}
LoaderGit defines the way to load model from git
func (*LoaderGit) Build ¶
func (loader *LoaderGit) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
type LoaderOSS ¶
type LoaderOSS struct {
// contains filtered or unexported fields
}
LoaderOSS defines the way to load model from oss
func (*LoaderOSS) Build ¶
func (loader *LoaderOSS) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
Load nothing inner go code
type ModelLoader ¶
type ModelLoader interface {
Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}
ModelLoader load models for worker
func NewLoaderOSS ¶
func NewLoaderOSS(ctx context.Context, c client.Client, endpoint *arcadiav1alpha1.Endpoint) (ModelLoader, error)
type ModelRunner ¶
type ModelRunner interface {
// Device used when running model
Device() Device
// Build a model runner instance
Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}
ModelRunner run a model service
func NewRunnerFastchat ¶
func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error)
func NewRunnerFastchatVLLM ¶
func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error)
type PodWorker ¶
type PodWorker struct {
// worker's namespacedname
types.NamespacedName
// contains filtered or unexported fields
}
PodWorker hosts this worker in a single pod but with different loader and runner based on Worker's configuration
func NewPodWorker ¶
func NewPodWorker(ctx context.Context, c client.Client, s *runtime.Scheme, w *arcadiav1alpha1.Worker, d *arcadiav1alpha1.Datasource) (*PodWorker, error)
func (*PodWorker) AfterStart ¶
Actions to do after start this worker
func (*PodWorker) BeforeStart ¶
BeforeStart will create resources which are related to this Worker Now we have a pvc(if configured),service,LLM(if a llm model),Embedder(if a embedding model)
func (*PodWorker) BeforeStop ¶
TODO: BeforeStop
func (*PodWorker) Model ¶
func (podWorker *PodWorker) Model() *arcadiav1alpha1.Model
Model that this worker is running for
func (*PodWorker) SuffixedName ¶
func (*PodWorker) Worker ¶
func (podWorker *PodWorker) Worker() *arcadiav1alpha1.Worker
type RDMALoader ¶
type RDMALoader struct {
// contains filtered or unexported fields
}
RDMALoader Support for RDMA. Allow Pod to user hostpath and RDMA to pull models faster and start services
func NewRDMALoader ¶
func NewRDMALoader(c client.Client, modelName, workerUID string, source *arcadiav1alpha1.Datasource) *RDMALoader
func (*RDMALoader) Build ¶
func (r *RDMALoader) Build(ctx context.Context, _ *arcadiav1alpha1.TypedObjectReference) (any, error)
type RunnerFastchat ¶
type RunnerFastchat struct {
// contains filtered or unexported fields
}
RunnerFastchat use fastchat to run a model
func (*RunnerFastchat) Build ¶
func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
Build a runner instance
func (*RunnerFastchat) Device ¶
func (runner *RunnerFastchat) Device() Device
func (*RunnerFastchat) NumberOfGPUs ¶
func (runner *RunnerFastchat) NumberOfGPUs() string
NumberOfGPUs utilized by this runner
type RunnerFastchatVLLM ¶
type RunnerFastchatVLLM struct {
// contains filtered or unexported fields
}
RunnerFastchatVLLM use fastchat with vllm to run a model
func (*RunnerFastchatVLLM) Build ¶
func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
Build a runner instance
func (*RunnerFastchatVLLM) Device ¶
func (runner *RunnerFastchatVLLM) Device() Device
Device used by this runner
func (*RunnerFastchatVLLM) NumberOfGPUs ¶
func (runner *RunnerFastchatVLLM) NumberOfGPUs() string
NumberOfGPUs utilized by this runner
type Worker ¶
type Worker interface {
// Worker that this is for
Worker() *arcadiav1alpha1.Worker
// Model that this worker is running for
Model() *arcadiav1alpha1.Model
// Actions to do before start this worker
BeforeStart(ctx context.Context) error
// Actions to do when Start this worker
Start(ctx context.Context) error
// Actions to do after start this worker
AfterStart(ctx context.Context) error
// Actions to do before stop this worker
BeforeStop(ctx context.Context) error
// Actions to do when Stop this worker
Stop(ctx context.Context) error
// State of this worker
State(context.Context) (any, error)
}
Worker implement the lifecycle management of a LLM worker