runtime

package
v1.36.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 14, 2025 License: Apache-2.0 Imports: 45 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrRequestCanceled = errors.New("request is canceled")

ErrRequestCanceled is returned when the request is canceled.

Functions

This section is empty.

Types

type Client

type Client interface {
	GetName(modelID string) string
	GetAddress(name string) string
	DeployRuntime(ctx context.Context, modelID string, update bool) (*appsv1.StatefulSet, error)
	DeleteRuntime(ctx context.Context, name, modelID string) error

	RuntimeName() string
	Namespace() string
}

Client is the interface for managing runtimes.

func NewNIMClient added in v1.30.0

func NewNIMClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	nconfig *config.NIMConfig,
	nmconfig *config.NIMModelConfig,
	modelGetter modelGetter,
) Client

NewNIMClient creates a new NIM runtime client.

func NewOllamaClient

func NewOllamaClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	oconfig config.OllamaConfig,
	modelGetter modelGetter,
) Client

NewOllamaClient creates a new Ollama runtime client.a

func NewTritonClient added in v0.378.0

func NewTritonClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	modelGetter modelGetter,
) Client

NewTritonClient creates a new Triton runtime client.

func NewVLLMClient

func NewVLLMClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	modelClient modelClient,
	vLLMConfg *config.VLLMConfig,
) Client

NewVLLMClient creates a new VLLM runtime client.

type ClientFactory

type ClientFactory interface {
	New(modelID string) (Client, error)
}

ClientFactory is the interface for creating a new Client given a model ID.

type LoRARebalancer added in v1.28.0

type LoRARebalancer struct {
	// contains filtered or unexported fields
}

LoRARebalancer is a controller that rebalances LoRA adapters across pods.

func NewLoRARebalancer added in v1.28.0

func NewLoRARebalancer(
	k8sClient k8sclient.Client,
	loraAdapterPullAndLoader loraAdapterPullAndLoader,
) *LoRARebalancer

NewLoRARebalancer creates a new LoRARebalancer.

func (*LoRARebalancer) NeedLeaderElection added in v1.28.0

func (r *LoRARebalancer) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*LoRARebalancer) Reconcile added in v1.28.0

func (r *LoRARebalancer) Reconcile(
	ctx context.Context,
	req ctrl.Request,
) (ctrl.Result, error)

Reconcile updates the pods in the cluster.

func (*LoRARebalancer) Run added in v1.28.0

func (r *LoRARebalancer) Run(ctx context.Context, interval time.Duration) error

Run periodically checks the status of the pods and loaded LoRA adapters.

func (*LoRARebalancer) SetupWithManager added in v1.28.0

func (r *LoRARebalancer) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the runtime manager with the given controller manager.

type LoRAReconciler added in v1.23.0

type LoRAReconciler struct {
	// contains filtered or unexported fields
}

LoRAReconciler reconciles the LoRA adapters loading status.

func NewLoRAReconciler added in v1.23.0

func NewLoRAReconciler(
	k8sClient k8sclient.Client,
	updateProcessor updateProcessor,
) *LoRAReconciler

NewLoRAReconciler creates a new LoRAReconciler.

func (*LoRAReconciler) Reconcile added in v1.23.0

func (r *LoRAReconciler) Reconcile(
	ctx context.Context,
	req ctrl.Request,
) (ctrl.Result, error)

Reconcile updates the pods in the cluster.

func (*LoRAReconciler) Run added in v1.23.0

func (r *LoRAReconciler) Run(ctx context.Context, interval time.Duration) error

Run periodically checks the status of the pods and loaded LoRA adapters.

func (*LoRAReconciler) SetupWithManager added in v1.23.0

func (r *LoRAReconciler) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the runtime manager with the given controller manager.

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager manages runtimes.

func NewManager

func NewManager(
	k8sClient client.Client,
	rtClientFactory ClientFactory,
	autoscaler autoscaler.Registerer,
	modelClient modelClient,
	enableDynamicLoRALoading bool,
	pullerPort int,
	nimModels map[string]bool,
) *Manager

NewManager creates a new runtime manager.

func (*Manager) BlacklistLLMAddress added in v1.29.0

func (m *Manager) BlacklistLLMAddress(modelID, address string) error

BlacklistLLMAddress blacklists the address of the LLM.

func (*Manager) DeleteModel added in v1.21.0

func (m *Manager) DeleteModel(ctx context.Context, modelID string) error

DeleteModel deletes the model from the model manager.

func (*Manager) GetLLMAddress

func (m *Manager) GetLLMAddress(modelID string) (string, error)

GetLLMAddress returns the address of the LLM.

func (*Manager) ListModels added in v1.34.0

func (m *Manager) ListModels() []*iv1.EngineStatus_Model

ListModels returns the list of models.

func (*Manager) PullModel

func (m *Manager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*Manager) PullModelUnblocked added in v1.33.0

func (m *Manager) PullModelUnblocked(ctx context.Context, modelID string) error

PullModelUnblocked pulls the model from the model manager without waiting for its completion.

func (*Manager) Reconcile

func (m *Manager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*Manager) RunStateMachine added in v1.24.0

func (m *Manager) RunStateMachine(ctx context.Context) error

RunStateMachine runs the state machine for the manager.

func (*Manager) SetupWithManager

func (m *Manager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

type ModelActivator added in v1.26.0

type ModelActivator struct {
	// contains filtered or unexported fields
}

ModelActivator preloads models.

func NewModelActivator added in v1.26.0

func NewModelActivator(preloadedModelIDs []string, mmanager modelManager, modelLister modelLister) *ModelActivator

NewModelActivator creates a new ModelActivator.

func (*ModelActivator) NeedLeaderElection added in v1.26.0

func (a *ModelActivator) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*ModelActivator) SetupWithManager added in v1.26.0

func (a *ModelActivator) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the multi-autoscaler with the Manager.

func (*ModelActivator) Start added in v1.26.0

func (a *ModelActivator) Start(ctx context.Context) error

Start starts the multi-autoscaler.

type ModelPuller added in v1.18.0

type ModelPuller interface {
	PullModel(ctx context.Context, modelID string) error
}

ModelPuller pulls a model.

type OllamaManager added in v1.18.0

type OllamaManager struct {
	// contains filtered or unexported fields
}

OllamaManager manages multiple models in a single ollama runtime.

func NewOllamaManager added in v1.18.0

func NewOllamaManager(
	k8sClient client.Client,
	ollamaClient Client,
	autoscaler autoscaler.Registerer,
	pullerAddr string,
) *OllamaManager

NewOllamaManager creates a new ollama runtime manager.

func (*OllamaManager) BlacklistLLMAddress added in v1.29.0

func (m *OllamaManager) BlacklistLLMAddress(modelID, address string) error

BlacklistLLMAddress blacklists the address of the LLM for the given model.

func (*OllamaManager) DeleteModel added in v1.21.0

func (m *OllamaManager) DeleteModel(ctx context.Context, modelID string) error

DeleteModel deletes the model from the model manager.

func (*OllamaManager) GetLLMAddress added in v1.18.0

func (m *OllamaManager) GetLLMAddress(_ string) (string, error)

GetLLMAddress returns the address of the LLM for the given model.

func (*OllamaManager) ListModels added in v1.34.0

func (m *OllamaManager) ListModels() []*iv1.EngineStatus_Model

ListModels returns the list of models.

func (*OllamaManager) NeedLeaderElection added in v1.18.0

func (m *OllamaManager) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*OllamaManager) PullModel added in v1.18.0

func (m *OllamaManager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*OllamaManager) PullModelUnblocked added in v1.33.0

func (m *OllamaManager) PullModelUnblocked(ctx context.Context, modelID string) error

PullModelUnblocked pulls the model from the model manager without waiting for its completion.

func (*OllamaManager) Reconcile added in v1.18.0

func (m *OllamaManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*OllamaManager) SetupWithManager added in v1.18.0

func (m *OllamaManager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

func (*OllamaManager) Start added in v1.18.0

func (m *OllamaManager) Start(ctx context.Context) error

Start deploys the ollama runtime.

type Preloader

type Preloader struct {
	// contains filtered or unexported fields
}

Preloader preloads models.

func NewPreloader

func NewPreloader(puller ModelPuller, ids []string, modelClient modelGetter) *Preloader

NewPreloader creates a new Preloader.

func (*Preloader) NeedLeaderElection

func (p *Preloader) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Preloader) SetupWithManager

func (p *Preloader) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the multi-autoscaler with the Manager.

func (*Preloader) Start

func (p *Preloader) Start(ctx context.Context) error

Start starts the multi-autoscaler.

type Updater

type Updater struct {
	// contains filtered or unexported fields
}

Updater updates runtimes at startup.

func NewUpdater

func NewUpdater(namespace string, rtClientFactory ClientFactory) *Updater

NewUpdater creates a new Updater.

func (*Updater) NeedLeaderElection

func (u *Updater) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Updater) SetupWithManager

func (u *Updater) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the updater with the manager.

func (*Updater) Start

func (u *Updater) Start(ctx context.Context) error

Start starts the updater.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL