controller

package
v0.0.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 15, 2025 License: Apache-2.0 Imports: 3 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Actuator

type Actuator interface {
	// EmitMetrics publishes metrics for external autoscalers (e.g., HPA, KEDA).
	// This includes real-time current state and Inferno's optimization targets.
	EmitMetrics(
		ctx context.Context,
		VariantAutoscalings *llmdOptv1alpha1.VariantAutoscaling,
	) error
}

type ModelAcceleratorAllocation

type ModelAcceleratorAllocation struct {
	Allocation *inferno.Allocation // allocation result of model analyzer

	RequiredPrefillQPS float64
	RequiredDecodeQPS  float64
	Reason             string
}

Allocation details of an accelerator to a variant

type ModelAnalyzeResponse

type ModelAnalyzeResponse struct {
	// feasible allocations for all accelerators
	Allocations map[string]*ModelAcceleratorAllocation // accelerator name -> allocation
}

Captures response from ModelAnalyzer(s) per model

type ModelAnalyzer

type ModelAnalyzer interface {
	AnalyzeModel(
		ctx context.Context,
		va llmdOptv1alpha1.VariantAutoscaling,
	) (*ModelAnalyzeResponse, error)
}

ModelAnalyzer defines the interface for model analysis.

type PrometheusConfig

type PrometheusConfig struct {
	// BaseURL is the Prometheus server URL (must use https:// scheme)
	BaseURL string `json:"baseURL"`

	// TLS configuration fields (TLS is always enabled for HTTPS-only support)
	InsecureSkipVerify bool   `json:"insecureSkipVerify,omitempty"` // Skip certificate verification (development/testing only)
	CACertPath         string `json:"caCertPath,omitempty"`         // Path to CA certificate for server validation
	ClientCertPath     string `json:"clientCertPath,omitempty"`     // Path to client certificate for mutual TLS authentication
	ClientKeyPath      string `json:"clientKeyPath,omitempty"`      // Path to client private key for mutual TLS authentication
	ServerName         string `json:"serverName,omitempty"`         // Expected server name for SNI (Server Name Indication)

	// Authentication fields (BearerToken takes precedence over TokenPath)
	BearerToken string `json:"bearerToken,omitempty"` // Direct bearer token string (development/testing)
	TokenPath   string `json:"tokenPath,omitempty"`   // Path to file containing bearer token (production with mounted secrets)
}

PrometheusConfig holds complete Prometheus client configuration including TLS settings

type ServiceClass

type ServiceClass struct {
	Name     string              `yaml:"name"`
	Priority int                 `yaml:"priority"`
	Data     []ServiceClassEntry `yaml:"data"`
}

type ServiceClassEntry

type ServiceClassEntry struct {
	Model   string `yaml:"model"`
	SLOTPOT int    `yaml:"slo-tpot"`
	SLOTTFT int    `yaml:"slo-ttft"`
}

type VariantAutoscalingsEngine

type VariantAutoscalingsEngine interface {
	Optimize(
		ctx context.Context,
		va llmdOptv1alpha1.VariantAutoscalingList,
		analysis map[string]*ModelAnalyzeResponse,
	) (map[string]llmdOptv1alpha1.OptimizedAlloc, error)
}

VariantAutoscalingsEngine defines the interface for the optimization engine.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL