Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Actuator ¶
type Actuator interface {
// EmitMetrics publishes metrics for external autoscalers (e.g., HPA, KEDA).
// This includes real-time current state and Inferno's optimization targets.
EmitMetrics(
ctx context.Context,
VariantAutoscalings *llmdOptv1alpha1.VariantAutoscaling,
) error
}
type ModelAcceleratorAllocation ¶
type ModelAcceleratorAllocation struct {
Allocation *inferno.Allocation // allocation result of model analyzer
RequiredPrefillQPS float64
RequiredDecodeQPS float64
Reason string
}
Allocation details of an accelerator to a variant
type ModelAnalyzeResponse ¶
type ModelAnalyzeResponse struct {
// feasible allocations for all accelerators
Allocations map[string]*ModelAcceleratorAllocation // accelerator name -> allocation
}
Captures response from ModelAnalyzer(s) per model
type ModelAnalyzer ¶
type ModelAnalyzer interface {
AnalyzeModel(
ctx context.Context,
va llmdOptv1alpha1.VariantAutoscaling,
) (*ModelAnalyzeResponse, error)
}
ModelAnalyzer defines the interface for model analysis.
type PrometheusConfig ¶
type PrometheusConfig struct {
// BaseURL is the Prometheus server URL (must use https:// scheme)
BaseURL string `json:"baseURL"`
// TLS configuration fields (TLS is always enabled for HTTPS-only support)
InsecureSkipVerify bool `json:"insecureSkipVerify,omitempty"` // Skip certificate verification (development/testing only)
CACertPath string `json:"caCertPath,omitempty"` // Path to CA certificate for server validation
ClientCertPath string `json:"clientCertPath,omitempty"` // Path to client certificate for mutual TLS authentication
ClientKeyPath string `json:"clientKeyPath,omitempty"` // Path to client private key for mutual TLS authentication
ServerName string `json:"serverName,omitempty"` // Expected server name for SNI (Server Name Indication)
// Authentication fields (BearerToken takes precedence over TokenPath)
BearerToken string `json:"bearerToken,omitempty"` // Direct bearer token string (development/testing)
TokenPath string `json:"tokenPath,omitempty"` // Path to file containing bearer token (production with mounted secrets)
}
PrometheusConfig holds complete Prometheus client configuration including TLS settings
type ServiceClass ¶
type ServiceClass struct {
Name string `yaml:"name"`
Priority int `yaml:"priority"`
Data []ServiceClassEntry `yaml:"data"`
}
type ServiceClassEntry ¶
type VariantAutoscalingsEngine ¶
type VariantAutoscalingsEngine interface {
Optimize(
ctx context.Context,
va llmdOptv1alpha1.VariantAutoscalingList,
analysis map[string]*ModelAnalyzeResponse,
) (map[string]llmdOptv1alpha1.OptimizedAlloc, error)
}
VariantAutoscalingsEngine defines the interface for the optimization engine.
Click to show internal directories.
Click to hide internal directories.