Documentation
¶
Index ¶
- Variables
- type Client
- func NewNIMClient(opts NewCommonClientOptions, nconfig *config.NIMConfig, ...) Client
- func NewOllamaClient(opts NewCommonClientOptions, oconfig config.OllamaConfig) Client
- func NewSGLangClient(opts NewCommonClientOptions, modelClient modelClient) Client
- func NewTritonClient(opts NewCommonClientOptions) Client
- func NewVLLMClient(opts NewCommonClientOptions, modelClient modelClient, ...) Client
- type ClientFactory
- type DriftedPodUpdater
- type LoRARebalancer
- type LoRAReconciler
- type Manager
- func (m *Manager) BlacklistLLMAddress(modelID, address string) error
- func (m *Manager) DeleteModel(ctx context.Context, modelID string) error
- func (m *Manager) GetLLMAddress(modelID string) (string, error)
- func (m *Manager) GetUpdateInProgressPodNames() map[string]struct{}
- func (m *Manager) ListModels() []*iv1.EngineStatus_Model
- func (m *Manager) PullModel(ctx context.Context, modelID string) error
- func (m *Manager) PullModelUnblocked(ctx context.Context, modelID string) error
- func (m *Manager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
- func (m *Manager) RunStateMachine(ctx context.Context) error
- func (m *Manager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error
- func (m *Manager) UpdateModel(ctx context.Context, modelID string) error
- type ModelActivator
- type ModelCache
- type NewCommonClientOptions
- type OllamaManager
- func (m *OllamaManager) BlacklistLLMAddress(modelID, address string) error
- func (m *OllamaManager) DeleteModel(ctx context.Context, modelID string) error
- func (m *OllamaManager) GetLLMAddress(_ string) (string, error)
- func (m *OllamaManager) GetUpdateInProgressPodNames() map[string]struct{}
- func (m *OllamaManager) ListModels() []*iv1.EngineStatus_Model
- func (m *OllamaManager) NeedLeaderElection() bool
- func (m *OllamaManager) PullModel(ctx context.Context, modelID string) error
- func (m *OllamaManager) PullModelUnblocked(ctx context.Context, modelID string) error
- func (m *OllamaManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
- func (m *OllamaManager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error
- func (m *OllamaManager) Start(ctx context.Context) error
- func (m *OllamaManager) UpdateModel(ctx context.Context, modelID string) error
- type PodMonitor
- type UpdateInProgressPodGetter
- type Updater
Constants ¶
This section is empty.
Variables ¶
var ErrRequestCanceled = errors.New("request is canceled")
ErrRequestCanceled is returned when the request is canceled.
Functions ¶
This section is empty.
Types ¶
type Client ¶
type Client interface {
GetName(modelID string) string
GetAddress(name string) string
DeployRuntime(ctx context.Context, model *mv1.Model, update bool) (*appsv1.StatefulSet, error)
DeleteRuntime(ctx context.Context, name, modelID string) error
RuntimeName() string
ModelConfigItem(model *mv1.Model) *config.ModelConfigItem
}
Client is the interface for managing runtimes.
func NewNIMClient ¶ added in v1.30.0
func NewNIMClient( opts NewCommonClientOptions, nconfig *config.NIMConfig, nmconfig *config.NIMModelConfig, ) Client
NewNIMClient creates a new NIM runtime client.
func NewOllamaClient ¶
func NewOllamaClient( opts NewCommonClientOptions, oconfig config.OllamaConfig, ) Client
NewOllamaClient creates a new Ollama runtime client.
func NewSGLangClient ¶ added in v1.45.0
func NewSGLangClient( opts NewCommonClientOptions, modelClient modelClient, ) Client
NewSGLangClient creates a new SGLang runtime client.
func NewTritonClient ¶ added in v0.378.0
func NewTritonClient(opts NewCommonClientOptions) Client
NewTritonClient creates a new Triton runtime client.
func NewVLLMClient ¶
func NewVLLMClient( opts NewCommonClientOptions, modelClient modelClient, vLLMConfg *config.VLLMConfig, ) Client
NewVLLMClient creates a new VLLM runtime client.
type ClientFactory ¶
ClientFactory is the interface for creating a new Client given a model ID.
type DriftedPodUpdater ¶ added in v1.37.0
type DriftedPodUpdater struct {
// contains filtered or unexported fields
}
DriftedPodUpdater updates runtimes at startup.
func NewDriftedPodUpdater ¶ added in v1.37.0
func NewDriftedPodUpdater( namespace string, k8sClient client.Client, updateInProgressPodGetter UpdateInProgressPodGetter, ) *DriftedPodUpdater
NewDriftedPodUpdater creates a new DriftedPodUpdater.
func (*DriftedPodUpdater) NeedLeaderElection ¶ added in v1.37.0
func (u *DriftedPodUpdater) NeedLeaderElection() bool
NeedLeaderElection implements LeaderElectionRunnable and always returns true.
func (*DriftedPodUpdater) Run ¶ added in v1.37.0
func (u *DriftedPodUpdater) Run(ctx context.Context) error
Run runs the updater.
func (*DriftedPodUpdater) SetupWithManager ¶ added in v1.37.0
func (u *DriftedPodUpdater) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the updater with the manager.
type LoRARebalancer ¶ added in v1.28.0
type LoRARebalancer struct {
// contains filtered or unexported fields
}
LoRARebalancer is a controller that rebalances LoRA adapters across pods.
func NewLoRARebalancer ¶ added in v1.28.0
func NewLoRARebalancer( k8sClient k8sclient.Client, loraAdapterPullAndLoader loraAdapterPullAndLoader, ) *LoRARebalancer
NewLoRARebalancer creates a new LoRARebalancer.
func (*LoRARebalancer) NeedLeaderElection ¶ added in v1.28.0
func (r *LoRARebalancer) NeedLeaderElection() bool
NeedLeaderElection implements LeaderElectionRunnable and always returns true.
func (*LoRARebalancer) Run ¶ added in v1.28.0
Run periodically checks the status of the pods and loaded LoRA adapters.
func (*LoRARebalancer) SetupWithManager ¶ added in v1.28.0
func (r *LoRARebalancer) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the runtime manager with the given controller manager.
type LoRAReconciler ¶ added in v1.23.0
type LoRAReconciler struct {
// contains filtered or unexported fields
}
LoRAReconciler reconciles the LoRA adapters loading status.
func NewLoRAReconciler ¶ added in v1.23.0
func NewLoRAReconciler( k8sClient k8sclient.Client, updateProcessor updateProcessor, ) *LoRAReconciler
NewLoRAReconciler creates a new LoRAReconciler.
func (*LoRAReconciler) Run ¶ added in v1.23.0
Run periodically checks the status of the pods and loaded LoRA adapters.
func (*LoRAReconciler) SetupWithManager ¶ added in v1.23.0
func (r *LoRAReconciler) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the runtime manager with the given controller manager.
type Manager ¶
type Manager struct {
// contains filtered or unexported fields
}
Manager manages runtimes.
func NewManager ¶
func NewManager( k8sClient client.Client, rtClientFactory ClientFactory, autoscaler autoscaler.Registerer, modelGetter modelGetter, podMonitor podMonitor, enableDynamicLoRALoading bool, pullerPort int, namespace string, nimModels map[string]bool, ) *Manager
NewManager creates a new runtime manager.
func (*Manager) BlacklistLLMAddress ¶ added in v1.29.0
BlacklistLLMAddress blacklists the address of the LLM.
func (*Manager) DeleteModel ¶ added in v1.21.0
DeleteModel deletes the model from the model manager.
func (*Manager) GetLLMAddress ¶
GetLLMAddress returns the address of the LLM.
func (*Manager) GetUpdateInProgressPodNames ¶ added in v1.37.0
GetUpdateInProgressPodNames returns the names of pods that are currently in the process of updating.
func (*Manager) ListModels ¶ added in v1.34.0
func (m *Manager) ListModels() []*iv1.EngineStatus_Model
ListModels returns the list of models.
func (*Manager) PullModelUnblocked ¶ added in v1.33.0
PullModelUnblocked pulls the model from the model manager without waiting for its completion.
func (*Manager) RunStateMachine ¶ added in v1.24.0
RunStateMachine runs the state machine for the manager.
func (*Manager) SetupWithManager ¶
SetupWithManager sets up the runtime manager with the given controller manager.
type ModelActivator ¶ added in v1.26.0
type ModelActivator struct {
// contains filtered or unexported fields
}
ModelActivator preloads models.
func NewModelActivator ¶ added in v1.26.0
func NewModelActivator( preloadedModelIDs []string, mmanager modelManager, modelLister modelLister, isDynamicLoRALoadingEnabled bool, ) *ModelActivator
NewModelActivator creates a new ModelActivator.
func (*ModelActivator) NeedLeaderElection ¶ added in v1.26.0
func (a *ModelActivator) NeedLeaderElection() bool
NeedLeaderElection implements LeaderElectionRunnable and always returns true.
func (*ModelActivator) SetupWithManager ¶ added in v1.26.0
func (a *ModelActivator) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the multi-autoscaler with the Manager.
type ModelCache ¶ added in v1.41.0
type ModelCache struct {
// contains filtered or unexported fields
}
ModelCache is a cache for models.
func NewModelCache ¶ added in v1.41.0
func NewModelCache(modelGetter modelGetter) *ModelCache
NewModelCache creates a new model cache
func (*ModelCache) GetModel ¶ added in v1.41.0
func (c *ModelCache) GetModel(ctx context.Context, in *mv1.GetModelRequest, opts ...grpc.CallOption) (*mv1.Model, error)
GetModel gets the model.
type NewCommonClientOptions ¶ added in v1.41.0
type NewCommonClientOptions struct {
K8sClient client.Client
Namespace string
Owner *metav1apply.OwnerReferenceApplyConfiguration
Rconfig *config.RuntimeConfig
Mconfig *config.ProcessedModelConfig
EnableDriftedPodUpdate bool
EnableOverrideWithModelConfig bool
}
NewCommonClientOptions are options for creating a commonClient.
type OllamaManager ¶ added in v1.18.0
type OllamaManager struct {
// contains filtered or unexported fields
}
OllamaManager manages multiple models in a single ollama runtime.
func NewOllamaManager ¶ added in v1.18.0
func NewOllamaManager( k8sClient client.Client, ollamaClient Client, autoscaler autoscaler.Registerer, pullerAddr string, ) *OllamaManager
NewOllamaManager creates a new ollama runtime manager.
func (*OllamaManager) BlacklistLLMAddress ¶ added in v1.29.0
func (m *OllamaManager) BlacklistLLMAddress(modelID, address string) error
BlacklistLLMAddress blacklists the address of the LLM for the given model.
func (*OllamaManager) DeleteModel ¶ added in v1.21.0
func (m *OllamaManager) DeleteModel(ctx context.Context, modelID string) error
DeleteModel deletes the model from the model manager.
func (*OllamaManager) GetLLMAddress ¶ added in v1.18.0
func (m *OllamaManager) GetLLMAddress(_ string) (string, error)
GetLLMAddress returns the address of the LLM for the given model.
func (*OllamaManager) GetUpdateInProgressPodNames ¶ added in v1.37.0
func (m *OllamaManager) GetUpdateInProgressPodNames() map[string]struct{}
GetUpdateInProgressPodNames returns the names of pods that are currently in the process of updating.
func (*OllamaManager) ListModels ¶ added in v1.34.0
func (m *OllamaManager) ListModels() []*iv1.EngineStatus_Model
ListModels returns the list of models.
func (*OllamaManager) NeedLeaderElection ¶ added in v1.18.0
func (m *OllamaManager) NeedLeaderElection() bool
NeedLeaderElection implements LeaderElectionRunnable and always returns true.
func (*OllamaManager) PullModel ¶ added in v1.18.0
func (m *OllamaManager) PullModel(ctx context.Context, modelID string) error
PullModel pulls the model from the model manager.
func (*OllamaManager) PullModelUnblocked ¶ added in v1.33.0
func (m *OllamaManager) PullModelUnblocked(ctx context.Context, modelID string) error
PullModelUnblocked pulls the model from the model manager without waiting for its completion.
func (*OllamaManager) SetupWithManager ¶ added in v1.18.0
func (m *OllamaManager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error
SetupWithManager sets up the runtime manager with the given controller manager.
func (*OllamaManager) Start ¶ added in v1.18.0
func (m *OllamaManager) Start(ctx context.Context) error
Start deploys the ollama runtime.
func (*OllamaManager) UpdateModel ¶ added in v1.41.0
func (m *OllamaManager) UpdateModel(ctx context.Context, modelID string) error
UpdateModel updates the model in the model manager.
type PodMonitor ¶ added in v1.39.0
type PodMonitor struct {
// contains filtered or unexported fields
}
PodMonitor monitors the pods in the cluster.
func NewPodMonitor ¶ added in v1.39.0
func NewPodMonitor( k8sClient k8sclient.Client, clientset kubernetes.Interface, ) *PodMonitor
NewPodMonitor constructs a PodMonitor.
func (*PodMonitor) SetupWithManager ¶ added in v1.39.0
func (m *PodMonitor) SetupWithManager(mgr ctrl.Manager) error
SetupWithManager sets up the runtime manager with the given controller manager.
type UpdateInProgressPodGetter ¶ added in v1.37.0
type UpdateInProgressPodGetter interface {
GetUpdateInProgressPodNames() map[string]struct{}
}
UpdateInProgressPodGetter gets the names of pods that are currently being updated.
type Updater ¶
type Updater struct {
// contains filtered or unexported fields
}
Updater updates runtimes at startup.
func NewUpdater ¶
func NewUpdater( namespace string, rtClientFactory ClientFactory, modelGetter modelGetter, ) *Updater
NewUpdater creates a new Updater.
TODO(kenji): Consider removing updater as ModelActivator now takes the responsibility of updating the runtime.
func (*Updater) NeedLeaderElection ¶
NeedLeaderElection implements LeaderElectionRunnable and always returns true.
func (*Updater) SetupWithManager ¶
SetupWithManager sets up the updater with the manager.