runtime

package
v1.46.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 27, 2026 License: Apache-2.0 Imports: 47 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrRequestCanceled = errors.New("request is canceled")

ErrRequestCanceled is returned when the request is canceled.

Functions

This section is empty.

Types

type Client

type Client interface {
	GetName(modelID string) string
	GetAddress(name string) string
	DeployRuntime(ctx context.Context, model *mv1.Model, update bool) (*appsv1.StatefulSet, error)
	DeleteRuntime(ctx context.Context, name, modelID string) error

	RuntimeName() string
	ModelConfigItem(model *mv1.Model) *config.ModelConfigItem
}

Client is the interface for managing runtimes.

func NewNIMClient added in v1.30.0

func NewNIMClient(
	opts NewCommonClientOptions,
	nconfig *config.NIMConfig,
	nmconfig *config.NIMModelConfig,
) Client

NewNIMClient creates a new NIM runtime client.

func NewOllamaClient

func NewOllamaClient(
	opts NewCommonClientOptions,
	oconfig config.OllamaConfig,
) Client

NewOllamaClient creates a new Ollama runtime client.

func NewSGLangClient added in v1.45.0

func NewSGLangClient(
	opts NewCommonClientOptions,
	modelClient modelClient,
) Client

NewSGLangClient creates a new SGLang runtime client.

func NewTritonClient added in v0.378.0

func NewTritonClient(opts NewCommonClientOptions) Client

NewTritonClient creates a new Triton runtime client.

func NewVLLMClient

func NewVLLMClient(
	opts NewCommonClientOptions,
	modelClient modelClient,
	vLLMConfg *config.VLLMConfig,
) Client

NewVLLMClient creates a new VLLM runtime client.

type ClientFactory

type ClientFactory interface {
	New(modelID string) (Client, error)
}

ClientFactory is the interface for creating a new Client given a model ID.

type DriftedPodUpdater added in v1.37.0

type DriftedPodUpdater struct {
	// contains filtered or unexported fields
}

DriftedPodUpdater updates runtimes at startup.

func NewDriftedPodUpdater added in v1.37.0

func NewDriftedPodUpdater(
	namespace string,
	k8sClient client.Client,
	updateInProgressPodGetter UpdateInProgressPodGetter,
) *DriftedPodUpdater

NewDriftedPodUpdater creates a new DriftedPodUpdater.

func (*DriftedPodUpdater) NeedLeaderElection added in v1.37.0

func (u *DriftedPodUpdater) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*DriftedPodUpdater) Reconcile added in v1.37.0

func (u *DriftedPodUpdater) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*DriftedPodUpdater) Run added in v1.37.0

func (u *DriftedPodUpdater) Run(ctx context.Context) error

Run runs the updater.

func (*DriftedPodUpdater) SetupWithManager added in v1.37.0

func (u *DriftedPodUpdater) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the updater with the manager.

type LoRARebalancer added in v1.28.0

type LoRARebalancer struct {
	// contains filtered or unexported fields
}

LoRARebalancer is a controller that rebalances LoRA adapters across pods.

func NewLoRARebalancer added in v1.28.0

func NewLoRARebalancer(
	k8sClient k8sclient.Client,
	loraAdapterPullAndLoader loraAdapterPullAndLoader,
) *LoRARebalancer

NewLoRARebalancer creates a new LoRARebalancer.

func (*LoRARebalancer) NeedLeaderElection added in v1.28.0

func (r *LoRARebalancer) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*LoRARebalancer) Reconcile added in v1.28.0

func (r *LoRARebalancer) Reconcile(
	ctx context.Context,
	req ctrl.Request,
) (ctrl.Result, error)

Reconcile updates the pods in the cluster.

func (*LoRARebalancer) Run added in v1.28.0

func (r *LoRARebalancer) Run(ctx context.Context, interval time.Duration) error

Run periodically checks the status of the pods and loaded LoRA adapters.

func (*LoRARebalancer) SetupWithManager added in v1.28.0

func (r *LoRARebalancer) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the runtime manager with the given controller manager.

type LoRAReconciler added in v1.23.0

type LoRAReconciler struct {
	// contains filtered or unexported fields
}

LoRAReconciler reconciles the LoRA adapters loading status.

func NewLoRAReconciler added in v1.23.0

func NewLoRAReconciler(
	k8sClient k8sclient.Client,
	updateProcessor updateProcessor,
) *LoRAReconciler

NewLoRAReconciler creates a new LoRAReconciler.

func (*LoRAReconciler) Reconcile added in v1.23.0

func (r *LoRAReconciler) Reconcile(
	ctx context.Context,
	req ctrl.Request,
) (ctrl.Result, error)

Reconcile updates the pods in the cluster.

func (*LoRAReconciler) Run added in v1.23.0

func (r *LoRAReconciler) Run(ctx context.Context, interval time.Duration) error

Run periodically checks the status of the pods and loaded LoRA adapters.

func (*LoRAReconciler) SetupWithManager added in v1.23.0

func (r *LoRAReconciler) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the runtime manager with the given controller manager.

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager manages runtimes.

func NewManager

func NewManager(
	k8sClient client.Client,
	rtClientFactory ClientFactory,
	autoscaler autoscaler.Registerer,
	modelGetter modelGetter,
	podMonitor podMonitor,
	enableDynamicLoRALoading bool,
	pullerPort int,
	namespace string,
	nimModels map[string]bool,
) *Manager

NewManager creates a new runtime manager.

func (*Manager) BlacklistLLMAddress added in v1.29.0

func (m *Manager) BlacklistLLMAddress(modelID, address string) error

BlacklistLLMAddress blacklists the address of the LLM.

func (*Manager) DeleteModel added in v1.21.0

func (m *Manager) DeleteModel(ctx context.Context, modelID string) error

DeleteModel deletes the model from the model manager.

func (*Manager) GetLLMAddress

func (m *Manager) GetLLMAddress(modelID string) (string, error)

GetLLMAddress returns the address of the LLM.

func (*Manager) GetUpdateInProgressPodNames added in v1.37.0

func (m *Manager) GetUpdateInProgressPodNames() map[string]struct{}

GetUpdateInProgressPodNames returns the names of pods that are currently in the process of updating.

func (*Manager) ListModels added in v1.34.0

func (m *Manager) ListModels() []*iv1.EngineStatus_Model

ListModels returns the list of models.

func (*Manager) PullModel

func (m *Manager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*Manager) PullModelUnblocked added in v1.33.0

func (m *Manager) PullModelUnblocked(ctx context.Context, modelID string) error

PullModelUnblocked pulls the model from the model manager without waiting for its completion.

func (*Manager) Reconcile

func (m *Manager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*Manager) RunStateMachine added in v1.24.0

func (m *Manager) RunStateMachine(ctx context.Context) error

RunStateMachine runs the state machine for the manager.

func (*Manager) SetupWithManager

func (m *Manager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

func (*Manager) UpdateModel added in v1.41.0

func (m *Manager) UpdateModel(ctx context.Context, modelID string) error

UpdateModel updates the model if its model config has changed.

type ModelActivator added in v1.26.0

type ModelActivator struct {
	// contains filtered or unexported fields
}

ModelActivator preloads models.

func NewModelActivator added in v1.26.0

func NewModelActivator(
	preloadedModelIDs []string,
	mmanager modelManager,
	modelLister modelLister,
	isDynamicLoRALoadingEnabled bool,
) *ModelActivator

NewModelActivator creates a new ModelActivator.

func (*ModelActivator) NeedLeaderElection added in v1.26.0

func (a *ModelActivator) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*ModelActivator) SetupWithManager added in v1.26.0

func (a *ModelActivator) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the multi-autoscaler with the Manager.

func (*ModelActivator) Start added in v1.26.0

func (a *ModelActivator) Start(ctx context.Context) error

Start starts the multi-autoscaler.

type ModelCache added in v1.41.0

type ModelCache struct {
	// contains filtered or unexported fields
}

ModelCache is a cache for models.

func NewModelCache added in v1.41.0

func NewModelCache(modelGetter modelGetter) *ModelCache

NewModelCache creates a new model cache

func (*ModelCache) GetModel added in v1.41.0

func (c *ModelCache) GetModel(ctx context.Context, in *mv1.GetModelRequest, opts ...grpc.CallOption) (*mv1.Model, error)

GetModel gets the model.

type NewCommonClientOptions added in v1.41.0

type NewCommonClientOptions struct {
	K8sClient client.Client
	Namespace string
	Owner     *metav1apply.OwnerReferenceApplyConfiguration
	Rconfig   *config.RuntimeConfig
	Mconfig   *config.ProcessedModelConfig

	EnableDriftedPodUpdate        bool
	EnableOverrideWithModelConfig bool
}

NewCommonClientOptions are options for creating a commonClient.

type OllamaManager added in v1.18.0

type OllamaManager struct {
	// contains filtered or unexported fields
}

OllamaManager manages multiple models in a single ollama runtime.

func NewOllamaManager added in v1.18.0

func NewOllamaManager(
	k8sClient client.Client,
	ollamaClient Client,
	autoscaler autoscaler.Registerer,
	pullerAddr string,
) *OllamaManager

NewOllamaManager creates a new ollama runtime manager.

func (*OllamaManager) BlacklistLLMAddress added in v1.29.0

func (m *OllamaManager) BlacklistLLMAddress(modelID, address string) error

BlacklistLLMAddress blacklists the address of the LLM for the given model.

func (*OllamaManager) DeleteModel added in v1.21.0

func (m *OllamaManager) DeleteModel(ctx context.Context, modelID string) error

DeleteModel deletes the model from the model manager.

func (*OllamaManager) GetLLMAddress added in v1.18.0

func (m *OllamaManager) GetLLMAddress(_ string) (string, error)

GetLLMAddress returns the address of the LLM for the given model.

func (*OllamaManager) GetUpdateInProgressPodNames added in v1.37.0

func (m *OllamaManager) GetUpdateInProgressPodNames() map[string]struct{}

GetUpdateInProgressPodNames returns the names of pods that are currently in the process of updating.

func (*OllamaManager) ListModels added in v1.34.0

func (m *OllamaManager) ListModels() []*iv1.EngineStatus_Model

ListModels returns the list of models.

func (*OllamaManager) NeedLeaderElection added in v1.18.0

func (m *OllamaManager) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*OllamaManager) PullModel added in v1.18.0

func (m *OllamaManager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*OllamaManager) PullModelUnblocked added in v1.33.0

func (m *OllamaManager) PullModelUnblocked(ctx context.Context, modelID string) error

PullModelUnblocked pulls the model from the model manager without waiting for its completion.

func (*OllamaManager) Reconcile added in v1.18.0

func (m *OllamaManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*OllamaManager) SetupWithManager added in v1.18.0

func (m *OllamaManager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

func (*OllamaManager) Start added in v1.18.0

func (m *OllamaManager) Start(ctx context.Context) error

Start deploys the ollama runtime.

func (*OllamaManager) UpdateModel added in v1.41.0

func (m *OllamaManager) UpdateModel(ctx context.Context, modelID string) error

UpdateModel updates the model in the model manager.

type PodMonitor added in v1.39.0

type PodMonitor struct {
	// contains filtered or unexported fields
}

PodMonitor monitors the pods in the cluster.

func NewPodMonitor added in v1.39.0

func NewPodMonitor(
	k8sClient k8sclient.Client,
	clientset kubernetes.Interface,
) *PodMonitor

NewPodMonitor constructs a PodMonitor.

func (*PodMonitor) Reconcile added in v1.39.0

func (m *PodMonitor) Reconcile(
	ctx context.Context,
	req ctrl.Request,
) (ctrl.Result, error)

Reconcile updates the pods in the cluster.

func (*PodMonitor) SetupWithManager added in v1.39.0

func (m *PodMonitor) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the runtime manager with the given controller manager.

type UpdateInProgressPodGetter added in v1.37.0

type UpdateInProgressPodGetter interface {
	GetUpdateInProgressPodNames() map[string]struct{}
}

UpdateInProgressPodGetter gets the names of pods that are currently being updated.

type Updater

type Updater struct {
	// contains filtered or unexported fields
}

Updater updates runtimes at startup.

func NewUpdater

func NewUpdater(
	namespace string,
	rtClientFactory ClientFactory,
	modelGetter modelGetter,
) *Updater

NewUpdater creates a new Updater.

TODO(kenji): Consider removing updater as ModelActivator now takes the responsibility of updating the runtime.

func (*Updater) NeedLeaderElection

func (u *Updater) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Updater) SetupWithManager

func (u *Updater) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the updater with the manager.

func (*Updater) Start

func (u *Updater) Start(ctx context.Context) error

Start starts the updater.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL