Documentation
¶
Index ¶
- Constants
- Variables
- func DefaultCostEstimator(prompt string, model string) float64
- func PrioritySchedulerWithClient(timeout time.Duration, logger *slog.Logger) (*PriorityScheduler, *PriorityClient)
- type AtomicRateLimitStats
- type BudgetAlert
- type BudgetAlertThreshold
- type BudgetClient
- func (b *BudgetClient) Analyze(ctx context.Context, prompt string, target any) error
- func (b *BudgetClient) Chat(ctx context.Context, prompt string) (string, error)
- func (b *BudgetClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (b *BudgetClient) GetTracker() *BudgetTracker
- func (b *BudgetClient) HealthCheck(ctx context.Context) HealthStatus
- type BudgetConfig
- type BudgetManager
- func (bm *BudgetManager) GetAllTrackers() map[string]*BudgetTracker
- func (bm *BudgetManager) GetGlobalStats() BudgetStats
- func (bm *BudgetManager) GetTracker(sessionID string) *BudgetTracker
- func (bm *BudgetManager) RemoveTracker(sessionID string)
- func (bm *BudgetManager) ResetAll()
- func (bm *BudgetManager) TrackRequest(sessionID string, cost float64) error
- type BudgetPeriod
- type BudgetStats
- type BudgetTracker
- func (bt *BudgetTracker) CheckAlerts()
- func (bt *BudgetTracker) CheckBudget(estimatedCost float64) (bool, float64, error)
- func (bt *BudgetTracker) GetRemaining() float64
- func (bt *BudgetTracker) GetStats() BudgetStats
- func (bt *BudgetTracker) IsExceeded() bool
- func (bt *BudgetTracker) Reset()
- func (bt *BudgetTracker) SetAlertCallback(callback func(alert BudgetAlert))
- func (bt *BudgetTracker) SetLimit(limit float64)
- func (bt *BudgetTracker) TrackRequest(cost float64) error
- type CacheEntry
- type CacheKey
- type CacheStats
- type CachedClient
- func (c *CachedClient) Analyze(ctx context.Context, prompt string, target any) error
- func (c *CachedClient) CacheStats() (keys int, hits int, misses int)
- func (c *CachedClient) Chat(ctx context.Context, prompt string) (string, error)
- func (c *CachedClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (c *CachedClient) ClearCache()
- func (c *CachedClient) HealthCheck(ctx context.Context) HealthStatus
- func (c *CachedClient) UnderlyingClient() LLMClient
- type CircuitBreaker
- func (cb *CircuitBreaker) Execute(ctx context.Context, fn func() error) error
- func (cb *CircuitBreaker) ExecuteWithResult(ctx context.Context, fn func() (interface{}, error)) (interface{}, error)
- func (cb *CircuitBreaker) ForceClose()
- func (cb *CircuitBreaker) ForceOpen()
- func (cb *CircuitBreaker) GetFailureRate() float64
- func (cb *CircuitBreaker) GetState() CircuitState
- func (cb *CircuitBreaker) GetStats() CircuitBreakerStats
- func (cb *CircuitBreaker) IsHealthy() bool
- func (cb *CircuitBreaker) Reset()
- type CircuitBreakerConfig
- type CircuitBreakerStats
- type CircuitClient
- func (c *CircuitClient) Analyze(ctx context.Context, prompt string, target any) error
- func (c *CircuitClient) Chat(ctx context.Context, prompt string) (string, error)
- func (c *CircuitClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (c *CircuitClient) Client() LLMClient
- func (c *CircuitClient) GetCircuitBreaker() *CircuitBreaker
- func (c *CircuitClient) HealthCheck(ctx context.Context) HealthStatus
- type CircuitState
- type ClientBuilder
- func (b *ClientBuilder) Build() (LLMClient, error)
- func (b *ClientBuilder) WithAPIKey(key string) *ClientBuilder
- func (b *ClientBuilder) WithCache(size ...int) *ClientBuilder
- func (b *ClientBuilder) WithCircuitBreaker(config ...CircuitBreakerConfig) *ClientBuilder
- func (b *ClientBuilder) WithEndpoint(endpoint string) *ClientBuilder
- func (b *ClientBuilder) WithLogger(logger *slog.Logger) *ClientBuilder
- func (b *ClientBuilder) WithMetrics(config ...MetricsConfig) *ClientBuilder
- func (b *ClientBuilder) WithModel(model string) *ClientBuilder
- func (b *ClientBuilder) WithRateLimit(rps float64) *ClientBuilder
- func (b *ClientBuilder) WithRateLimitConfig(rps float64, burst int) *ClientBuilder
- func (b *ClientBuilder) WithRetry(maxRetries int) *ClientBuilder
- func (b *ClientBuilder) WithRetryConfig(maxRetries, minWaitMs, maxWaitMs int) *ClientBuilder
- type ClientHealth
- type CostCalculator
- func (cc *CostCalculator) AddPricing(pricing ModelPricing)
- func (cc *CostCalculator) CalculateCost(modelName string, inputTokens, outputTokens int) (float64, error)
- func (cc *CostCalculator) CountTokens(text string) int
- func (cc *CostCalculator) CountTokensFromMessages(messages []openai.ChatCompletionMessage) int
- func (cc *CostCalculator) GetAllSessions() map[string]*SessionCost
- func (cc *CostCalculator) GetPricing(modelName string) (ModelPricing, bool)
- func (cc *CostCalculator) GetSessionCost(sessionID string) (*SessionCost, bool)
- func (cc *CostCalculator) GetTotalCost() float64
- func (cc *CostCalculator) ResetSession(sessionID string)
- func (cc *CostCalculator) SetSessionBudget(sessionID string, budget float64)
- func (cc *CostCalculator) TrackRequest(sessionID, modelName string, inputTokens, outputTokens int) (*SessionCost, float64, error)
- type CostEstimator
- type FailoverConfig
- type FailoverHistory
- type FailoverManager
- func (fm *FailoverManager) Close()
- func (fm *FailoverManager) ExecuteWithFailover(ctx context.Context, fn func(provider *ProviderConfig) error) error
- func (fm *FailoverManager) GetCurrentProvider() *ProviderConfig
- func (fm *FailoverManager) GetStats() FailoverStats
- func (fm *FailoverManager) ManualFailover(providerName string) error
- func (fm *FailoverManager) Reset()
- func (fm *FailoverManager) SetCurrentProvider(name string)
- func (fm *FailoverManager) SetLastFailoverTime(t time.Time)
- type FailoverRecord
- type FailoverStats
- type HealthChecker
- type HealthMonitor
- func (h *HealthMonitor) Analyze(ctx context.Context, prompt string, target any) error
- func (h *HealthMonitor) Chat(ctx context.Context, prompt string) (string, error)
- func (h *HealthMonitor) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (h *HealthMonitor) HealthCheck(ctx context.Context) HealthStatus
- func (h *HealthMonitor) IsHealthy() bool
- func (h *HealthMonitor) LastHealthCheck() time.Time
- type HealthStatus
- type LLMClient
- func DevelopmentClient(apiKey, model string) (LLMClient, error)
- func DevelopmentClientWithEndpoint(apiKey, endpoint, model string) (LLMClient, error)
- func HighThroughputClient(apiKey, model string) (LLMClient, error)
- func MinimalClient(apiKey, model string) (LLMClient, error)
- func ProductionClient(apiKey, model string) (LLMClient, error)
- func ProductionClientWithEndpoint(apiKey, endpoint, model string) (LLMClient, error)
- func ReliableClient(apiKey, model string) (LLMClient, error)
- func TestingClient(apiKey, model string) (LLMClient, error)
- type MetricsClient
- func (m *MetricsClient) Analyze(ctx context.Context, prompt string, target any) error
- func (m *MetricsClient) Chat(ctx context.Context, prompt string) (string, error)
- func (m *MetricsClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (m *MetricsClient) Client() LLMClient
- func (m *MetricsClient) GetMetrics() *MetricsCollector
- func (m *MetricsClient) HealthCheck(ctx context.Context) HealthStatus
- type MetricsCollector
- func (mc *MetricsCollector) EndRequest()
- func (mc *MetricsCollector) GetStats() MetricsStats
- func (mc *MetricsCollector) RecordRequest(model string, scenario string, inputTokens int64, outputTokens int64, ...)
- func (mc *MetricsCollector) RecordRoutingDecision(scenario Scenario, strategy RouteStrategy, model string)
- func (mc *MetricsCollector) StartRequest()
- type MetricsConfig
- type MetricsStats
- type ModelConfig
- type ModelPricing
- type ModelUsage
- type ObservableClient
- type ObservableClientImpl
- func (o *ObservableClientImpl) Analyze(ctx context.Context, prompt string, target any) error
- func (o *ObservableClientImpl) Chat(ctx context.Context, prompt string) (string, error)
- func (o *ObservableClientImpl) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (o *ObservableClientImpl) GetCacheStats() CacheStats
- func (o *ObservableClientImpl) GetCircuitBreakerStats() CircuitBreakerStats
- func (o *ObservableClientImpl) GetClientHealth(ctx context.Context) ClientHealth
- func (o *ObservableClientImpl) GetMetricsStats() MetricsStats
- func (o *ObservableClientImpl) GetRateLimitStats() RateLimitStats
- func (o *ObservableClientImpl) HealthCheck(ctx context.Context) HealthStatus
- type OpenAIClient
- func (c *OpenAIClient) Analyze(ctx context.Context, prompt string, target any) error
- func (c *OpenAIClient) Chat(ctx context.Context, prompt string) (string, error)
- func (c *OpenAIClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (c *OpenAIClient) HealthCheck(ctx context.Context) HealthStatus
- type Priority
- type PriorityClient
- func (pc *PriorityClient) ProcessAll(ctx context.Context) error
- func (pc *PriorityClient) ProcessNext(ctx context.Context) error
- func (pc *PriorityClient) Submit(ctx context.Context, id string, priority Priority, execute func() error) error
- func (pc *PriorityClient) SubmitWithResult(ctx context.Context, id string, priority Priority, execute func() error) (<-chan error, error)
- type PriorityConfig
- type PriorityQueue
- type PriorityRequest
- type PriorityScheduler
- func (ps *PriorityScheduler) Clear()
- func (ps *PriorityScheduler) Dequeue(ctx context.Context) (*PriorityRequest, error)
- func (ps *PriorityScheduler) Enqueue(ctx context.Context, id string, priority Priority, execute func() error, ...) error
- func (ps *PriorityScheduler) GetStats() PriorityStats
- func (ps *PriorityScheduler) Shutdown()
- func (ps *PriorityScheduler) Size() int
- func (ps *PriorityScheduler) TryDequeue() *PriorityRequest
- type PriorityStats
- type ProviderConfig
- type RateLimitConfig
- type RateLimitStats
- type RateLimitedClient
- func (c *RateLimitedClient) Analyze(ctx context.Context, prompt string, target any) error
- func (c *RateLimitedClient) Chat(ctx context.Context, prompt string) (string, error)
- func (c *RateLimitedClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (c *RateLimitedClient) Client() LLMClient
- func (c *RateLimitedClient) HealthCheck(ctx context.Context) HealthStatus
- func (c *RateLimitedClient) SetModel(model string)
- type RateLimiter
- func (rl *RateLimiter) Allow() bool
- func (rl *RateLimiter) Close()
- func (rl *RateLimiter) GetStats() RateLimitStats
- func (rl *RateLimiter) Remaining() int
- func (rl *RateLimiter) Reset()
- func (rl *RateLimiter) ResetStats()
- func (rl *RateLimiter) SetModelRate(model string, requestsPerSecond float64, burstSize int)
- func (rl *RateLimiter) SetRate(requestsPerSecond float64, burstSize int)
- func (rl *RateLimiter) TryWithRateLimit(ctx context.Context, model string, fn func() error) error
- func (rl *RateLimiter) Wait(ctx context.Context) error
- func (rl *RateLimiter) WaitModel(ctx context.Context, model string) error
- func (rl *RateLimiter) WithRateLimit(ctx context.Context, model string, fn func() error) error
- type RequestTimer
- type RetryClient
- func (r *RetryClient) Analyze(ctx context.Context, prompt string, target any) error
- func (r *RetryClient) Chat(ctx context.Context, prompt string) (string, error)
- func (r *RetryClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (r *RetryClient) HealthCheck(ctx context.Context) HealthStatus
- type RouteStrategy
- type Router
- func (r *Router) AddModel(model ModelConfig)
- func (r *Router) DetectScenario(prompt string) Scenario
- func (r *Router) GetDefaultStrategy() RouteStrategy
- func (r *Router) GetModels() []ModelConfig
- func (r *Router) RemoveModel(name string)
- func (r *Router) SelectModel(ctx context.Context, scenario Scenario, strategy RouteStrategy) (*ModelConfig, error)
- type RouterConfig
- type Scenario
- type SessionCost
- type TokenUsage
Constants ¶
const ( DefaultCacheSize = 1000 DefaultMaxRetries = 3 DefaultRetryMinWaitMs = 100 DefaultRetryMaxWaitMs = 5000 DefaultRPS = 10.0 DefaultMaxQueueSize = 100 DefaultQueueTimeout = 30 * time.Second DefaultMaxLatencySamples = 1000 )
Default configuration values.
Variables ¶
var ( ErrAPIKeyRequired = errors.New("API key is required") ErrModelRequired = errors.New("model is required") ErrInvalidEndpoint = errors.New("invalid endpoint URL") )
Static errors for validation.
Functions ¶
func DefaultCostEstimator ¶ added in v0.22.0
DefaultCostEstimator provides a simple cost estimation. Uses approximate pricing: $0.001 per 1K prompt tokens.
func PrioritySchedulerWithClient ¶ added in v0.22.0
func PrioritySchedulerWithClient(timeout time.Duration, logger *slog.Logger) (*PriorityScheduler, *PriorityClient)
PrioritySchedulerWithClient creates a priority scheduler and client for request prioritization. Returns both the scheduler (for workers) and client (for submitting requests).
Usage:
scheduler, client := llm.PrioritySchedulerWithClient(5 * time.Minute, nil)
// Submit high priority request
_ = client.Submit(ctx, "req-1", llm.PriorityHigh, func() error {
_, err := llmClient.Chat(ctx, prompt)
return err
})
// Worker processes requests
_ = client.ProcessNext(ctx)
Types ¶
type AtomicRateLimitStats ¶ added in v0.18.0
type AtomicRateLimitStats struct {
TotalRequests atomic.Int64
QueuedRequests atomic.Int64
RejectedRequests atomic.Int64
AvgWaitTimeMs atomic.Float64
}
AtomicRateLimitStats holds atomic stats for concurrent access.
type BudgetAlert ¶ added in v0.18.0
type BudgetAlert struct {
Timestamp time.Time
Period BudgetPeriod
CurrentCost float64
Limit float64
Percentage float64
Message string
IsHardLimit bool
}
BudgetAlert represents a budget alert event.
type BudgetAlertThreshold ¶ added in v0.18.0
type BudgetAlertThreshold struct {
// Percentage is the budget percentage (e.g., 80 for 80%).
Percentage float64
// Message is the alert message template.
Message string
}
BudgetAlertThreshold defines alert thresholds.
type BudgetClient ¶ added in v0.22.0
type BudgetClient struct {
// contains filtered or unexported fields
}
BudgetClient wraps an LLM client with budget tracking. It checks budget before each request and tracks costs after.
func BudgetClientWithTracker ¶ added in v0.22.0
func BudgetClientWithTracker(client LLMClient, tracker *BudgetTracker, model string, estimator CostEstimator) *BudgetClient
BudgetClientWithTracker creates a client with budget tracking. The BudgetTracker must be created separately and managed by the caller.
Usage:
tracker := llm.NewBudgetTracker(llm.BudgetConfig{
Period: llm.BudgetDaily,
Limit: 10.0, // $10 daily
EnableHardLimit: true,
}, "session-123")
client, _ := llm.BudgetClientWithTracker(baseClient, tracker, "gpt-4", nil)
func NewBudgetClient ¶ added in v0.22.0
func NewBudgetClient(client LLMClient, tracker *BudgetTracker, model string, estimator CostEstimator) *BudgetClient
NewBudgetClient creates a new budget-aware client wrapper.
func NewBudgetManagedClient ¶ added in v0.22.0
func NewBudgetManagedClient(apiKey, model string, dailyLimit float64) (*BudgetClient, error)
NewBudgetManagedClient creates a client with budget tracking using default configuration. This is a convenience function for simple budget control.
Usage:
client, _ := llm.NewBudgetManagedClient(apiKey, "gpt-4", 10.0) // $10 budget
func (*BudgetClient) Analyze ¶ added in v0.22.0
Analyze implements the Analyze method with budget tracking.
func (*BudgetClient) ChatStream ¶ added in v0.22.0
ChatStream implements the ChatStream method with budget tracking.
func (*BudgetClient) GetTracker ¶ added in v0.22.0
func (b *BudgetClient) GetTracker() *BudgetTracker
GetTracker returns the underlying budget tracker.
func (*BudgetClient) HealthCheck ¶ added in v0.22.0
func (b *BudgetClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck delegates to the underlying client.
type BudgetConfig ¶ added in v0.18.0
type BudgetConfig struct {
// Period is the budget period (daily/weekly/monthly/session).
Period BudgetPeriod
// Limit is the budget limit in USD.
Limit float64
// AlertThresholds is the list of alert thresholds.
AlertThresholds []BudgetAlertThreshold
// EnableHardLimit enables hard limit (reject requests over budget).
EnableHardLimit bool
// EnableSoftLimit enables soft limit (warn but allow).
EnableSoftLimit bool
// Logger for budget events.
Logger *slog.Logger
}
BudgetConfig holds configuration for budget control.
func DefaultBudgetConfig ¶ added in v0.18.0
func DefaultBudgetConfig() BudgetConfig
DefaultBudgetConfig returns sensible defaults.
type BudgetManager ¶ added in v0.18.0
type BudgetManager struct {
// contains filtered or unexported fields
}
BudgetManager manages multiple budget trackers (e.g., per user/session).
func NewBudgetManager ¶ added in v0.18.0
func NewBudgetManager(config BudgetConfig) *BudgetManager
NewBudgetManager creates a new budget manager.
func (*BudgetManager) GetAllTrackers ¶ added in v0.18.0
func (bm *BudgetManager) GetAllTrackers() map[string]*BudgetTracker
GetAllTrackers returns all budget trackers.
func (*BudgetManager) GetGlobalStats ¶ added in v0.18.0
func (bm *BudgetManager) GetGlobalStats() BudgetStats
GetGlobalStats returns global budget statistics.
func (*BudgetManager) GetTracker ¶ added in v0.18.0
func (bm *BudgetManager) GetTracker(sessionID string) *BudgetTracker
GetTracker gets or creates a budget tracker for a session.
func (*BudgetManager) RemoveTracker ¶ added in v0.18.0
func (bm *BudgetManager) RemoveTracker(sessionID string)
RemoveTracker removes a budget tracker.
func (*BudgetManager) ResetAll ¶ added in v0.18.0
func (bm *BudgetManager) ResetAll()
ResetAll resets all budget trackers.
func (*BudgetManager) TrackRequest ¶ added in v0.18.0
func (bm *BudgetManager) TrackRequest(sessionID string, cost float64) error
TrackRequest tracks a request across all trackers.
type BudgetPeriod ¶ added in v0.18.0
type BudgetPeriod string
BudgetPeriod defines the budget period type.
const ( // BudgetDaily - Daily budget reset. BudgetDaily BudgetPeriod = "daily" // BudgetWeekly - Weekly budget reset (Monday). BudgetWeekly BudgetPeriod = "weekly" // BudgetMonthly - Monthly budget reset (1st). BudgetMonthly BudgetPeriod = "monthly" // BudgetSession - Session-based budget (no auto-reset). BudgetSession BudgetPeriod = "session" )
type BudgetStats ¶ added in v0.18.0
type BudgetStats struct {
CurrentCost float64
Limit float64
Remaining float64
PercentageUsed float64
RequestCount int64
PeriodStart time.Time
PeriodEnd time.Time
IsExceeded bool
AlertsTriggered []float64
}
BudgetStats holds budget statistics.
type BudgetTracker ¶ added in v0.18.0
type BudgetTracker struct {
// contains filtered or unexported fields
}
BudgetTracker tracks budget usage and enforces limits.
func NewBudgetTracker ¶ added in v0.18.0
func NewBudgetTracker(config BudgetConfig, sessionID string) *BudgetTracker
NewBudgetTracker creates a new budget tracker.
func (*BudgetTracker) CheckAlerts ¶ added in v0.18.0
func (bt *BudgetTracker) CheckAlerts()
CheckAlerts checks and triggers budget alerts (public for testing).
func (*BudgetTracker) CheckBudget ¶ added in v0.18.0
func (bt *BudgetTracker) CheckBudget(estimatedCost float64) (bool, float64, error)
CheckBudget checks if a request is within budget. Returns (allowed, cost, error).
func (*BudgetTracker) GetRemaining ¶ added in v0.18.0
func (bt *BudgetTracker) GetRemaining() float64
GetRemaining returns the remaining budget.
func (*BudgetTracker) GetStats ¶ added in v0.18.0
func (bt *BudgetTracker) GetStats() BudgetStats
GetStats returns budget statistics.
func (*BudgetTracker) IsExceeded ¶ added in v0.18.0
func (bt *BudgetTracker) IsExceeded() bool
IsExceeded returns true if budget is exceeded.
func (*BudgetTracker) Reset ¶ added in v0.18.0
func (bt *BudgetTracker) Reset()
Reset manually resets the budget tracker.
func (*BudgetTracker) SetAlertCallback ¶ added in v0.18.0
func (bt *BudgetTracker) SetAlertCallback(callback func(alert BudgetAlert))
SetAlertCallback sets a callback function for budget alerts.
func (*BudgetTracker) SetLimit ¶ added in v0.18.0
func (bt *BudgetTracker) SetLimit(limit float64)
SetLimit updates the budget limit.
func (*BudgetTracker) TrackRequest ¶ added in v0.18.0
func (bt *BudgetTracker) TrackRequest(cost float64) error
TrackRequest tracks a request's cost against the budget.
type CacheEntry ¶ added in v0.18.0
CacheEntry represents a cached response.
type CacheStats ¶ added in v0.22.0
CacheStats represents cache statistics.
type CachedClient ¶ added in v0.18.0
type CachedClient struct {
// contains filtered or unexported fields
}
CachedClient wraps an LLM client with LRU caching.
func NewCachedClient ¶ added in v0.18.0
func NewCachedClient(client LLMClient, cacheSize int) *CachedClient
NewCachedClient creates a new cached client wrapper. Set cacheSize to 0 to disable caching.
func (*CachedClient) CacheStats ¶ added in v0.18.0
func (c *CachedClient) CacheStats() (keys int, hits int, misses int)
CacheStats returns current cache statistics.
func (*CachedClient) ChatStream ¶ added in v0.18.0
ChatStream does not use caching (streams are not cacheable).
func (*CachedClient) ClearCache ¶ added in v0.18.0
func (c *CachedClient) ClearCache()
ClearCache clears all cached entries.
func (*CachedClient) HealthCheck ¶ added in v0.18.0
func (c *CachedClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck delegates to the underlying client.
func (*CachedClient) UnderlyingClient ¶ added in v0.22.0
func (c *CachedClient) UnderlyingClient() LLMClient
UnderlyingClient returns the underlying client. Used for component extraction in observable chains.
type CircuitBreaker ¶ added in v0.18.0
type CircuitBreaker struct {
// contains filtered or unexported fields
}
CircuitBreaker wraps gobreaker with additional HotPlex-specific features.
func NewCircuitBreaker ¶ added in v0.18.0
func NewCircuitBreaker(config CircuitBreakerConfig) *CircuitBreaker
NewCircuitBreaker creates a new circuit breaker.
func (*CircuitBreaker) Execute ¶ added in v0.18.0
func (cb *CircuitBreaker) Execute(ctx context.Context, fn func() error) error
Execute executes a function with circuit breaker protection.
func (*CircuitBreaker) ExecuteWithResult ¶ added in v0.18.0
func (cb *CircuitBreaker) ExecuteWithResult(ctx context.Context, fn func() (interface{}, error)) (interface{}, error)
ExecuteWithResult executes a function that returns a value with circuit breaker protection.
func (*CircuitBreaker) ForceClose ¶ added in v0.18.0
func (cb *CircuitBreaker) ForceClose()
ForceClose manually closes the circuit (override protection).
func (*CircuitBreaker) ForceOpen ¶ added in v0.18.0
func (cb *CircuitBreaker) ForceOpen()
ForceOpen manually opens the circuit (for maintenance/emergency).
func (*CircuitBreaker) GetFailureRate ¶ added in v0.18.0
func (cb *CircuitBreaker) GetFailureRate() float64
GetFailureRate returns the current failure rate (last interval).
func (*CircuitBreaker) GetState ¶ added in v0.18.0
func (cb *CircuitBreaker) GetState() CircuitState
GetState returns the current circuit state.
func (*CircuitBreaker) GetStats ¶ added in v0.18.0
func (cb *CircuitBreaker) GetStats() CircuitBreakerStats
GetStats returns circuit breaker statistics.
func (*CircuitBreaker) IsHealthy ¶ added in v0.18.0
func (cb *CircuitBreaker) IsHealthy() bool
IsHealthy returns true if circuit is closed or half-open.
func (*CircuitBreaker) Reset ¶ added in v0.18.0
func (cb *CircuitBreaker) Reset()
Reset manually resets the circuit breaker to closed state.
type CircuitBreakerConfig ¶ added in v0.18.0
type CircuitBreakerConfig struct {
// Name is the circuit breaker name (for logging/metrics).
Name string
// MaxFailures is the number of failures before opening circuit.
MaxFailures uint32
// Interval is the time window for counting failures.
Interval time.Duration
// Timeout is how long circuit stays open before half-open.
Timeout time.Duration
// HalfOpenMaxRequests is max requests allowed in half-open state.
HalfOpenMaxRequests uint32
// SuccessThreshold is successes needed in half-open to close.
SuccessThreshold uint32
// Logger for circuit state changes.
Logger *slog.Logger
}
CircuitBreakerConfig holds configuration for circuit breaker.
func DefaultCircuitBreakerConfig ¶ added in v0.18.0
func DefaultCircuitBreakerConfig() CircuitBreakerConfig
DefaultCircuitBreakerConfig returns sensible defaults.
type CircuitBreakerStats ¶ added in v0.18.0
type CircuitBreakerStats struct {
State CircuitState
TotalRequests uint64
SuccessRequests uint64
FailRequests uint64
CircuitOpens uint64
LastStateChange time.Time
IsForced bool
}
CircuitBreakerStats holds circuit breaker statistics.
type CircuitClient ¶ added in v0.22.0
type CircuitClient struct {
// contains filtered or unexported fields
}
CircuitClient wraps an LLM client with circuit breaker protection.
func NewCircuitClient ¶ added in v0.22.0
func NewCircuitClient(client LLMClient, breaker *CircuitBreaker) *CircuitClient
NewCircuitClient creates a new circuit breaker protected client wrapper.
func (*CircuitClient) Analyze ¶ added in v0.22.0
Analyze implements the Analyze method with circuit breaker protection.
func (*CircuitClient) Chat ¶ added in v0.22.0
Chat implements the Chat method with circuit breaker protection.
func (*CircuitClient) ChatStream ¶ added in v0.22.0
ChatStream implements the ChatStream method with circuit breaker protection.
func (*CircuitClient) Client ¶ added in v0.22.0
func (c *CircuitClient) Client() LLMClient
Client returns the underlying client for component extraction.
func (*CircuitClient) GetCircuitBreaker ¶ added in v0.22.0
func (c *CircuitClient) GetCircuitBreaker() *CircuitBreaker
GetCircuitBreaker returns the underlying circuit breaker for monitoring.
func (*CircuitClient) HealthCheck ¶ added in v0.22.0
func (c *CircuitClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck implements the HealthCheck method.
type CircuitState ¶ added in v0.18.0
type CircuitState string
CircuitState represents the state of a circuit breaker.
const ( // CircuitClosed - Normal operation, requests pass through. CircuitClosed CircuitState = "closed" // CircuitOpen - Circuit is tripped, requests fail fast. CircuitOpen CircuitState = "open" // CircuitHalfOpen - Testing if service has recovered. CircuitHalfOpen CircuitState = "half-open" )
type ClientBuilder ¶ added in v0.22.0
type ClientBuilder struct {
// contains filtered or unexported fields
}
ClientBuilder provides a fluent API for constructing LLM clients with various middleware layers (rate limiting, caching, circuit breaker, etc.).
func NewClientBuilder ¶ added in v0.22.0
func NewClientBuilder() *ClientBuilder
NewClientBuilder creates a new client builder with default logger.
func (*ClientBuilder) Build ¶ added in v0.22.0
func (b *ClientBuilder) Build() (LLMClient, error)
Build constructs the LLM client with all configured middleware layers. The wrapping order (from innermost to outermost):
- Base client (OpenAI)
- Cache (innermost - cache raw responses before retries)
- Retry (retry on transient failures)
- Rate Limiter (control request rate)
- Circuit Breaker (fail fast on repeated errors)
- Metrics (outermost - observe all operations)
func (*ClientBuilder) WithAPIKey ¶ added in v0.22.0
func (b *ClientBuilder) WithAPIKey(key string) *ClientBuilder
WithAPIKey sets the API key for the LLM provider.
func (*ClientBuilder) WithCache ¶ added in v0.22.0
func (b *ClientBuilder) WithCache(size ...int) *ClientBuilder
WithCache enables response caching with optional custom cache size.
func (*ClientBuilder) WithCircuitBreaker ¶ added in v0.22.0
func (b *ClientBuilder) WithCircuitBreaker(config ...CircuitBreakerConfig) *ClientBuilder
WithCircuitBreaker enables circuit breaker protection with optional custom configuration.
func (*ClientBuilder) WithEndpoint ¶ added in v0.22.0
func (b *ClientBuilder) WithEndpoint(endpoint string) *ClientBuilder
WithEndpoint sets the API endpoint (optional, for non-OpenAI providers).
func (*ClientBuilder) WithLogger ¶ added in v0.22.0
func (b *ClientBuilder) WithLogger(logger *slog.Logger) *ClientBuilder
WithLogger sets a custom logger.
func (*ClientBuilder) WithMetrics ¶ added in v0.22.0
func (b *ClientBuilder) WithMetrics(config ...MetricsConfig) *ClientBuilder
WithMetrics enables metrics collection with optional custom configuration.
func (*ClientBuilder) WithModel ¶ added in v0.22.0
func (b *ClientBuilder) WithModel(model string) *ClientBuilder
WithModel sets the model to use.
func (*ClientBuilder) WithRateLimit ¶ added in v0.22.0
func (b *ClientBuilder) WithRateLimit(rps float64) *ClientBuilder
WithRateLimit enables rate limiting with specified requests per second.
func (*ClientBuilder) WithRateLimitConfig ¶ added in v0.22.0
func (b *ClientBuilder) WithRateLimitConfig(rps float64, burst int) *ClientBuilder
WithRateLimitConfig enables rate limiting with custom burst size.
func (*ClientBuilder) WithRetry ¶ added in v0.22.0
func (b *ClientBuilder) WithRetry(maxRetries int) *ClientBuilder
WithRetry enables retry logic with specified max retries.
func (*ClientBuilder) WithRetryConfig ¶ added in v0.22.0
func (b *ClientBuilder) WithRetryConfig(maxRetries, minWaitMs, maxWaitMs int) *ClientBuilder
WithRetryConfig enables retry logic with custom wait times.
type ClientHealth ¶ added in v0.22.0
type ClientHealth struct {
Status HealthStatus
CircuitState CircuitState
CacheHitRate float64
RateLimitUsage float64
LastError string
LastErrorTime time.Time
TotalRequests int64
SuccessRate float64
AvgLatencyMs float64
}
ClientHealth represents the overall health of an LLM client.
type CostCalculator ¶ added in v0.18.0
type CostCalculator struct {
// contains filtered or unexported fields
}
CostCalculator calculates costs based on model pricing.
func NewCostCalculator ¶ added in v0.18.0
func NewCostCalculator() *CostCalculator
NewCostCalculator creates a new cost calculator.
func (*CostCalculator) AddPricing ¶ added in v0.18.0
func (cc *CostCalculator) AddPricing(pricing ModelPricing)
AddPricing adds or updates pricing for a model.
func (*CostCalculator) CalculateCost ¶ added in v0.18.0
func (cc *CostCalculator) CalculateCost(modelName string, inputTokens, outputTokens int) (float64, error)
CalculateCost calculates the cost for a request.
func (*CostCalculator) CountTokens ¶ added in v0.18.0
func (cc *CostCalculator) CountTokens(text string) int
CountTokens estimates token count from text. This is a simple approximation; for production use, consider using tiktoken.
func (*CostCalculator) CountTokensFromMessages ¶ added in v0.18.0
func (cc *CostCalculator) CountTokensFromMessages(messages []openai.ChatCompletionMessage) int
CountTokensFromMessages counts tokens from OpenAI messages.
func (*CostCalculator) GetAllSessions ¶ added in v0.18.0
func (cc *CostCalculator) GetAllSessions() map[string]*SessionCost
GetAllSessions returns all session costs.
func (*CostCalculator) GetPricing ¶ added in v0.18.0
func (cc *CostCalculator) GetPricing(modelName string) (ModelPricing, bool)
GetPricing returns pricing for a model.
func (*CostCalculator) GetSessionCost ¶ added in v0.18.0
func (cc *CostCalculator) GetSessionCost(sessionID string) (*SessionCost, bool)
GetSessionCost returns cost statistics for a session.
func (*CostCalculator) GetTotalCost ¶ added in v0.18.0
func (cc *CostCalculator) GetTotalCost() float64
GetTotalCost returns total cost across all sessions.
func (*CostCalculator) ResetSession ¶ added in v0.18.0
func (cc *CostCalculator) ResetSession(sessionID string)
ResetSession resets a session's cost tracking.
func (*CostCalculator) SetSessionBudget ¶ added in v0.18.0
func (cc *CostCalculator) SetSessionBudget(sessionID string, budget float64)
SetSessionBudget sets a budget limit for a session.
func (*CostCalculator) TrackRequest ¶ added in v0.18.0
func (cc *CostCalculator) TrackRequest(sessionID, modelName string, inputTokens, outputTokens int) (*SessionCost, float64, error)
TrackRequest tracks a request's cost for a session.
type CostEstimator ¶ added in v0.22.0
CostEstimator estimates the cost of a request.
type FailoverConfig ¶ added in v0.18.0
type FailoverConfig struct {
// Providers is the list of configured providers.
Providers []ProviderConfig
// EnableAutoFailover enables automatic failover on errors.
EnableAutoFailover bool
// EnableFailback enables automatic failback to primary.
EnableFailback bool
// FailbackCooldown is the wait time before attempting failback.
FailbackCooldown time.Duration
// HealthCheckInterval is how often to check provider health.
HealthCheckInterval time.Duration
// MaxFailoverAttempts is max consecutive failovers before giving up.
MaxFailoverAttempts int
// Logger for failover events.
Logger *slog.Logger
// HistorySize is the number of failover records to keep.
HistorySize int
}
FailoverConfig holds configuration for multi-provider failover.
func DefaultFailoverConfig ¶ added in v0.18.0
func DefaultFailoverConfig() FailoverConfig
DefaultFailoverConfig returns sensible defaults.
type FailoverHistory ¶ added in v0.18.0
type FailoverHistory struct {
// contains filtered or unexported fields
}
FailoverHistory maintains a circular buffer of failover records.
func NewFailoverHistory ¶ added in v0.18.0
func NewFailoverHistory(size int) *FailoverHistory
NewFailoverHistory creates a new failover history buffer.
func (*FailoverHistory) Add ¶ added in v0.18.0
func (h *FailoverHistory) Add(record FailoverRecord)
Add adds a failover record.
func (*FailoverHistory) GetRecent ¶ added in v0.18.0
func (h *FailoverHistory) GetRecent(count int) []FailoverRecord
GetRecent returns the most recent failover records.
type FailoverManager ¶ added in v0.18.0
type FailoverManager struct {
// contains filtered or unexported fields
}
FailoverManager manages multi-provider failover logic.
func NewFailoverManager ¶ added in v0.18.0
func NewFailoverManager(config FailoverConfig) *FailoverManager
NewFailoverManager creates a new failover manager.
func (*FailoverManager) Close ¶ added in v0.18.0
func (fm *FailoverManager) Close()
Close stops the health check goroutine and cleans up resources. Should be called when the FailoverManager is no longer needed.
func (*FailoverManager) ExecuteWithFailover ¶ added in v0.18.0
func (fm *FailoverManager) ExecuteWithFailover(ctx context.Context, fn func(provider *ProviderConfig) error) error
ExecuteWithFailover executes a function with automatic failover.
func (*FailoverManager) GetCurrentProvider ¶ added in v0.18.0
func (fm *FailoverManager) GetCurrentProvider() *ProviderConfig
GetCurrentProvider returns the current active provider.
func (*FailoverManager) GetStats ¶ added in v0.18.0
func (fm *FailoverManager) GetStats() FailoverStats
GetStats returns failover statistics.
func (*FailoverManager) ManualFailover ¶ added in v0.18.0
func (fm *FailoverManager) ManualFailover(providerName string) error
ManualFailover manually switches to a specific provider.
func (*FailoverManager) Reset ¶ added in v0.18.0
func (fm *FailoverManager) Reset()
Reset resets failover state to initial configuration.
func (*FailoverManager) SetCurrentProvider ¶ added in v0.18.0
func (fm *FailoverManager) SetCurrentProvider(name string)
SetCurrentProvider sets the current provider (for testing).
func (*FailoverManager) SetLastFailoverTime ¶ added in v0.18.0
func (fm *FailoverManager) SetLastFailoverTime(t time.Time)
SetLastFailoverTime sets the last failover time (for testing).
type FailoverRecord ¶ added in v0.18.0
type FailoverRecord struct {
Timestamp time.Time
FromProvider string
ToProvider string
Reason string
Duration time.Duration
Success bool
}
FailoverRecord represents a failover event record.
type FailoverStats ¶ added in v0.18.0
type FailoverStats struct {
IsActive bool
CurrentProvider string
FailoverCount int32
LastFailoverTime time.Time
HealthyProviders []string
UnhealthyProviders []string
RecentFailovers []FailoverRecord
}
FailoverStats holds failover statistics.
type HealthChecker ¶ added in v0.18.0
type HealthChecker interface {
HealthCheck(ctx context.Context) HealthStatus
}
HealthChecker provides health check capability.
type HealthMonitor ¶ added in v0.18.0
type HealthMonitor struct {
// contains filtered or unexported fields
}
HealthMonitor wraps a client with health monitoring capabilities.
func NewHealthMonitor ¶ added in v0.18.0
func NewHealthMonitor(client interface {
Chat(ctx context.Context, prompt string) (string, error)
Analyze(ctx context.Context, prompt string, target any) error
ChatStream(ctx context.Context, prompt string) (<-chan string, error)
HealthCheck(ctx context.Context) HealthStatus
}, checkInterval time.Duration) *HealthMonitor
NewHealthMonitor creates a health monitor wrapper.
func (*HealthMonitor) ChatStream ¶ added in v0.18.0
ChatStream delegates to the underlying client.
func (*HealthMonitor) HealthCheck ¶ added in v0.18.0
func (h *HealthMonitor) HealthCheck(ctx context.Context) HealthStatus
HealthCheck performs a health check with optional caching.
func (*HealthMonitor) IsHealthy ¶ added in v0.18.0
func (h *HealthMonitor) IsHealthy() bool
IsHealthy returns true if the last health check passed.
func (*HealthMonitor) LastHealthCheck ¶ added in v0.18.0
func (h *HealthMonitor) LastHealthCheck() time.Time
LastHealthCheck returns the time of the last health check.
type HealthStatus ¶ added in v0.18.0
HealthStatus represents the health status of an LLM client. Exported for use by the brain package.
type LLMClient ¶ added in v0.22.0
type LLMClient interface {
Chat(ctx context.Context, prompt string) (string, error)
Analyze(ctx context.Context, prompt string, target any) error
ChatStream(ctx context.Context, prompt string) (<-chan string, error)
HealthCheck(ctx context.Context) HealthStatus
}
LLMClient defines the interface for LLM interactions. All client wrappers must implement this interface.
func DevelopmentClient ¶ added in v0.22.0
DevelopmentClient creates a development client with minimal overhead. Only metrics are enabled for debugging purposes.
func DevelopmentClientWithEndpoint ¶ added in v0.22.0
DevelopmentClientWithEndpoint creates a development client with custom endpoint.
func HighThroughputClient ¶ added in v0.22.0
HighThroughputClient creates a client optimized for high-throughput scenarios. Aggressive caching, high rate limits, and no retries for speed.
func MinimalClient ¶ added in v0.22.0
MinimalClient creates a bare-bones client with no middleware. Useful for simple use cases or custom configurations.
func ProductionClient ¶ added in v0.22.0
ProductionClient creates a production-ready LLM client with all capabilities enabled. Suitable for production workloads requiring reliability, observability, and protection.
func ProductionClientWithEndpoint ¶ added in v0.22.0
ProductionClientWithEndpoint creates a production-ready client with custom endpoint.
func ReliableClient ¶ added in v0.22.0
ReliableClient creates a client optimized for reliability. Aggressive retries, circuit breaker, and conservative rate limiting.
func TestingClient ¶ added in v0.22.0
TestingClient creates a client optimized for testing scenarios. Includes cache for deterministic responses and minimal rate limiting.
type MetricsClient ¶ added in v0.22.0
type MetricsClient struct {
// contains filtered or unexported fields
}
MetricsClient wraps an LLM client with metrics collection.
func NewMetricsClient ¶ added in v0.22.0
func NewMetricsClient(client LLMClient, metrics *MetricsCollector, model string) *MetricsClient
NewMetricsClient creates a new metrics-enabled client wrapper.
func (*MetricsClient) Analyze ¶ added in v0.22.0
Analyze implements the Analyze method with metrics collection.
func (*MetricsClient) Chat ¶ added in v0.22.0
Chat implements the Chat method with metrics collection.
func (*MetricsClient) ChatStream ¶ added in v0.22.0
ChatStream implements the ChatStream method with metrics collection.
func (*MetricsClient) Client ¶ added in v0.22.0
func (m *MetricsClient) Client() LLMClient
Client returns the underlying client for component extraction.
func (*MetricsClient) GetMetrics ¶ added in v0.22.0
func (m *MetricsClient) GetMetrics() *MetricsCollector
GetMetrics returns the underlying metrics collector for stats retrieval.
func (*MetricsClient) HealthCheck ¶ added in v0.22.0
func (m *MetricsClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck implements the HealthCheck method.
type MetricsCollector ¶ added in v0.18.0
type MetricsCollector struct {
// contains filtered or unexported fields
}
MetricsCollector collects and exports LLM metrics via OpenTelemetry.
func NewMetricsCollector ¶ added in v0.18.0
func NewMetricsCollector(config MetricsConfig) *MetricsCollector
NewMetricsCollector creates a new metrics collector.
func (*MetricsCollector) EndRequest ¶ added in v0.18.0
func (mc *MetricsCollector) EndRequest()
EndRequest decrements the active request counter.
func (*MetricsCollector) GetStats ¶ added in v0.18.0
func (mc *MetricsCollector) GetStats() MetricsStats
GetStats returns current metrics statistics.
func (*MetricsCollector) RecordRequest ¶ added in v0.18.0
func (mc *MetricsCollector) RecordRequest( model string, scenario string, inputTokens int64, outputTokens int64, cost float64, latencyMs float64, err error, )
RecordRequest records a completed request with metrics.
func (*MetricsCollector) RecordRoutingDecision ¶ added in v0.18.0
func (mc *MetricsCollector) RecordRoutingDecision(scenario Scenario, strategy RouteStrategy, model string)
RecordRoutingDecision records a routing decision.
func (*MetricsCollector) StartRequest ¶ added in v0.18.0
func (mc *MetricsCollector) StartRequest()
StartRequest increments the active request counter.
type MetricsConfig ¶ added in v0.18.0
type MetricsConfig struct {
// Enabled enables metrics collection.
Enabled bool
// ServiceName is the name for the metrics service.
ServiceName string
// MaxLatencySamples is the max number of latency samples to keep locally.
MaxLatencySamples int
}
MetricsConfig holds configuration for metrics collection.
func DefaultMetricsConfig ¶ added in v0.22.0
func DefaultMetricsConfig() MetricsConfig
DefaultMetricsConfig returns default metrics configuration.
type MetricsStats ¶ added in v0.18.0
type MetricsStats struct {
TotalRequests int64
TotalInputTokens int64
TotalOutputTokens int64
TotalCost float64
TotalErrors int64
ErrorRate float64
AvgLatencyMs float64
ActiveRequests int64
}
MetricsStats represents aggregated metrics statistics.
type ModelConfig ¶ added in v0.18.0
type ModelConfig struct {
// Name is the model identifier (e.g., "gpt-4o-mini", "qwen-plus").
Name string
// Provider is the provider name (e.g., "openai", "dashscope").
Provider string
// APIKey is the API key for this model.
APIKey string
// Endpoint is the optional custom endpoint.
Endpoint string
// CostPer1KInput is the cost per 1K input tokens (USD).
CostPer1KInput float64
// CostPer1KOutput is the cost per 1K output tokens (USD).
CostPer1KOutput float64
// AvgLatencyMs is the average latency in milliseconds.
AvgLatencyMs int64
// MaxTokens is the maximum context window size.
MaxTokens int
// Enabled indicates if this model is available for routing.
Enabled bool
}
ModelConfig represents configuration for a single model.
type ModelPricing ¶ added in v0.18.0
type ModelPricing struct {
ModelName string
Provider string
CostPer1KInput float64 // USD per 1K input tokens
CostPer1KOutput float64 // USD per 1K output tokens
CostPer1KCache float64 // USD per 1K cached input tokens (if applicable)
Currency string
EffectiveDate time.Time
}
ModelPricing represents pricing for a model.
func DefaultModelPricing ¶ added in v0.18.0
func DefaultModelPricing() []ModelPricing
DefaultModelPricing returns default pricing for common models.
type ModelUsage ¶ added in v0.18.0
type ModelUsage struct {
ModelName string
InputTokens int64
OutputTokens int64
Cost float64
RequestCount int64
}
ModelUsage tracks usage for a specific model within a session.
type ObservableClient ¶ added in v0.22.0
type ObservableClient interface {
LLMClient
// Component access
GetMetricsStats() MetricsStats
GetCircuitBreakerStats() CircuitBreakerStats
GetRateLimitStats() RateLimitStats
GetCacheStats() CacheStats
// Unified health check
GetClientHealth(ctx context.Context) ClientHealth
}
ObservableClient extends LLMClient with observability capabilities. It provides unified access to internal component statistics and health.
func AsObservable ¶ added in v0.22.0
func AsObservable(client LLMClient) ObservableClient
AsObservable attempts to cast an LLMClient to ObservableClient. Returns the ObservableClient if successful, or wraps it if not already observable.
func NewObservableClient ¶ added in v0.22.0
func NewObservableClient(client LLMClient) ObservableClient
NewObservableClient creates an observable client wrapper. It extracts internal components from the client chain.
type ObservableClientImpl ¶ added in v0.22.0
type ObservableClientImpl struct {
// contains filtered or unexported fields
}
ObservableClientImpl wraps an LLMClient with observability capabilities.
func (*ObservableClientImpl) ChatStream ¶ added in v0.22.0
func (o *ObservableClientImpl) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
ChatStream delegates to the underlying client.
func (*ObservableClientImpl) GetCacheStats ¶ added in v0.22.0
func (o *ObservableClientImpl) GetCacheStats() CacheStats
GetCacheStats returns cache statistics if available.
func (*ObservableClientImpl) GetCircuitBreakerStats ¶ added in v0.22.0
func (o *ObservableClientImpl) GetCircuitBreakerStats() CircuitBreakerStats
GetCircuitBreakerStats returns circuit breaker statistics if available.
func (*ObservableClientImpl) GetClientHealth ¶ added in v0.22.0
func (o *ObservableClientImpl) GetClientHealth(ctx context.Context) ClientHealth
GetClientHealth returns comprehensive health information.
func (*ObservableClientImpl) GetMetricsStats ¶ added in v0.22.0
func (o *ObservableClientImpl) GetMetricsStats() MetricsStats
GetMetricsStats returns metrics statistics if available.
func (*ObservableClientImpl) GetRateLimitStats ¶ added in v0.22.0
func (o *ObservableClientImpl) GetRateLimitStats() RateLimitStats
GetRateLimitStats returns rate limit statistics if available.
func (*ObservableClientImpl) HealthCheck ¶ added in v0.22.0
func (o *ObservableClientImpl) HealthCheck(ctx context.Context) HealthStatus
HealthCheck delegates to the underlying client.
type OpenAIClient ¶
type OpenAIClient struct {
// contains filtered or unexported fields
}
OpenAIClient implements OpenAI-compatible LLM interactions. It can be used for OpenAI, DeepSeek, Groq, etc.
func NewOpenAIClient ¶
func NewOpenAIClient(apiKey, endpoint, model string, logger *slog.Logger) *OpenAIClient
NewOpenAIClient creates a new OpenAI compatible client.
func (*OpenAIClient) Analyze ¶
Analyze requests JSON formatted output from the model. It uses "ResponseFormat: {Type: JSON_OBJECT}" to ensure model compatibility for structured reasoning.
func (*OpenAIClient) ChatStream ¶ added in v0.18.0
ChatStream returns a channel that streams tokens as they are generated. The channel is closed when the stream completes or an error occurs.
func (*OpenAIClient) HealthCheck ¶ added in v0.18.0
func (c *OpenAIClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck performs a simple health check by making a minimal API call.
type PriorityClient ¶ added in v0.18.0
type PriorityClient struct {
// contains filtered or unexported fields
}
PriorityClient wraps a scheduler with execution capabilities.
func NewPriorityClient ¶ added in v0.18.0
func NewPriorityClient(scheduler *PriorityScheduler, timeout time.Duration, logger *slog.Logger) *PriorityClient
NewPriorityClient creates a new priority client.
func (*PriorityClient) ProcessAll ¶ added in v0.18.0
func (pc *PriorityClient) ProcessAll(ctx context.Context) error
ProcessAll processes all requests in the queue.
func (*PriorityClient) ProcessNext ¶ added in v0.18.0
func (pc *PriorityClient) ProcessNext(ctx context.Context) error
ProcessNext processes the next request in the queue.
func (*PriorityClient) Submit ¶ added in v0.18.0
func (pc *PriorityClient) Submit(ctx context.Context, id string, priority Priority, execute func() error) error
Submit submits a request for execution.
func (*PriorityClient) SubmitWithResult ¶ added in v0.18.0
func (pc *PriorityClient) SubmitWithResult(ctx context.Context, id string, priority Priority, execute func() error) (<-chan error, error)
SubmitWithResult submits a request and returns the result.
type PriorityConfig ¶ added in v0.18.0
type PriorityConfig struct {
// MaxQueueSize is the maximum queue size.
MaxQueueSize int
// EnableLowPriorityDrop enables dropping low priority requests when queue is full.
EnableLowPriorityDrop bool
// HighPriorityReserve reserves queue slots for high priority.
HighPriorityReserve int
// MaxWaitTime is the maximum wait time for requests.
MaxWaitTime time.Duration
// Logger for queue events.
Logger *slog.Logger
}
PriorityConfig holds configuration for priority queue.
func DefaultPriorityConfig ¶ added in v0.18.0
func DefaultPriorityConfig() PriorityConfig
DefaultPriorityConfig returns sensible defaults.
type PriorityQueue ¶ added in v0.18.0
type PriorityQueue struct {
// contains filtered or unexported fields
}
PriorityQueue implements a priority queue for requests.
func (*PriorityQueue) Len ¶ added in v0.18.0
func (pq *PriorityQueue) Len() int
Len returns the number of items in the queue.
func (*PriorityQueue) Less ¶ added in v0.18.0
func (pq *PriorityQueue) Less(i, j int) bool
Less compares two items (lower priority value = higher priority).
func (*PriorityQueue) Peek ¶ added in v0.18.0
func (pq *PriorityQueue) Peek() *PriorityRequest
Peek returns the highest priority item without removing it.
func (*PriorityQueue) Pop ¶ added in v0.18.0
func (pq *PriorityQueue) Pop() interface{}
Pop removes and returns the highest priority item.
func (*PriorityQueue) Push ¶ added in v0.18.0
func (pq *PriorityQueue) Push(x interface{})
Push adds an item to the queue.
func (*PriorityQueue) Swap ¶ added in v0.18.0
func (pq *PriorityQueue) Swap(i, j int)
Swap swaps two items.
type PriorityRequest ¶ added in v0.18.0
type PriorityRequest struct {
ID string
Priority Priority
Context context.Context
Execute func() error
CreatedAt time.Time
Deadline time.Time
MaxWait time.Duration
// contains filtered or unexported fields
}
PriorityRequest represents a request in the priority queue.
type PriorityScheduler ¶ added in v0.18.0
type PriorityScheduler struct {
// contains filtered or unexported fields
}
PriorityScheduler manages priority-based request scheduling.
func NewPriorityScheduler ¶ added in v0.18.0
func NewPriorityScheduler(config PriorityConfig) *PriorityScheduler
NewPriorityScheduler creates a new priority scheduler.
func (*PriorityScheduler) Clear ¶ added in v0.18.0
func (ps *PriorityScheduler) Clear()
Clear clears all items from the queue.
func (*PriorityScheduler) Dequeue ¶ added in v0.18.0
func (ps *PriorityScheduler) Dequeue(ctx context.Context) (*PriorityRequest, error)
Dequeue removes and returns the highest priority request.
func (*PriorityScheduler) Enqueue ¶ added in v0.18.0
func (ps *PriorityScheduler) Enqueue(ctx context.Context, id string, priority Priority, execute func() error, maxWait time.Duration) error
Enqueue adds a request to the priority queue.
func (*PriorityScheduler) GetStats ¶ added in v0.18.0
func (ps *PriorityScheduler) GetStats() PriorityStats
GetStats returns scheduler statistics.
func (*PriorityScheduler) Shutdown ¶ added in v0.18.0
func (ps *PriorityScheduler) Shutdown()
Shutdown gracefully shuts down the scheduler.
func (*PriorityScheduler) Size ¶ added in v0.18.0
func (ps *PriorityScheduler) Size() int
Size returns the current queue size.
func (*PriorityScheduler) TryDequeue ¶ added in v0.18.0
func (ps *PriorityScheduler) TryDequeue() *PriorityRequest
TryDequeue tries to dequeue without blocking.
type PriorityStats ¶ added in v0.18.0
type PriorityStats struct {
QueueSize int32
MaxQueueSize int
Dropped int64
Processed int64
HighProcessed int64
MediumProcessed int64
LowProcessed int64
HighDropped int64
MediumDropped int64
LowDropped int64
IsShutdown bool
}
PriorityStats holds scheduler statistics.
type ProviderConfig ¶ added in v0.18.0
type ProviderConfig struct {
// Name is the provider identifier (e.g., "openai", "dashscope", "anthropic").
Name string
// APIKey is the API key for this provider.
APIKey string
// Endpoint is the optional custom endpoint.
Endpoint string
// Models is the list of models available from this provider.
Models []string
// Priority is the failover priority (1 = primary, higher = backup).
Priority int
// Enabled indicates if this provider is available.
Enabled bool
// Timeout is the request timeout for this provider.
Timeout time.Duration
// MaxRetries is the max retry attempts for this provider.
MaxRetries int
}
ProviderConfig represents configuration for an LLM provider.
type RateLimitConfig ¶ added in v0.18.0
type RateLimitConfig struct {
// RequestsPerSecond is the sustained request rate.
RequestsPerSecond float64
// BurstSize is the maximum burst size.
BurstSize int
// MaxQueueSize is the maximum number of waiting requests.
MaxQueueSize int
// QueueTimeout is the maximum time a request can wait in queue.
QueueTimeout time.Duration
// PerModel enables per-model rate limiting.
PerModel bool
}
RateLimitConfig holds configuration for rate limiting.
type RateLimitStats ¶ added in v0.18.0
type RateLimitStats struct {
TotalRequests int64
QueuedRequests int64
RejectedRequests int64
AvgWaitTimeMs float64
LastReset time.Time
}
RateLimitStats holds rate limiting statistics.
type RateLimitedClient ¶ added in v0.18.0
type RateLimitedClient struct {
// contains filtered or unexported fields
}
RateLimitedClient wraps a client with rate limiting.
func NewRateLimitedClient ¶ added in v0.18.0
func NewRateLimitedClient(client LLMClient, limiter *RateLimiter) *RateLimitedClient
NewRateLimitedClient creates a new rate-limited client wrapper.
func (*RateLimitedClient) ChatStream ¶ added in v0.18.0
ChatStream implements rate-limited streaming.
func (*RateLimitedClient) Client ¶ added in v0.22.0
func (c *RateLimitedClient) Client() LLMClient
Client returns the underlying client for component extraction.
func (*RateLimitedClient) HealthCheck ¶ added in v0.18.0
func (c *RateLimitedClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck implements rate-limited health check.
func (*RateLimitedClient) SetModel ¶ added in v0.18.0
func (c *RateLimitedClient) SetModel(model string)
SetModel sets the model for per-model rate limiting.
type RateLimiter ¶ added in v0.18.0
type RateLimiter struct {
// contains filtered or unexported fields
}
RateLimiter provides rate limiting with token bucket algorithm.
func NewRateLimiter ¶ added in v0.18.0
func NewRateLimiter(config RateLimitConfig) *RateLimiter
NewRateLimiter creates a new rate limiter.
func (*RateLimiter) Allow ¶ added in v0.18.0
func (rl *RateLimiter) Allow() bool
Allow checks if a request can proceed immediately.
func (*RateLimiter) Close ¶ added in v0.18.0
func (rl *RateLimiter) Close()
Close closes the rate limiter and stops queue processing.
func (*RateLimiter) GetStats ¶ added in v0.18.0
func (rl *RateLimiter) GetStats() RateLimitStats
GetStats returns current rate limiting statistics.
func (*RateLimiter) Remaining ¶ added in v0.18.0
func (rl *RateLimiter) Remaining() int
Remaining returns the number of requests remaining in the current burst.
func (*RateLimiter) Reset ¶ added in v0.18.0
func (rl *RateLimiter) Reset()
Reset resets the rate limiter immediately.
func (*RateLimiter) ResetStats ¶ added in v0.18.0
func (rl *RateLimiter) ResetStats()
ResetStats resets the statistics.
func (*RateLimiter) SetModelRate ¶ added in v0.18.0
func (rl *RateLimiter) SetModelRate(model string, requestsPerSecond float64, burstSize int)
SetModelRate sets rate limit for a specific model.
func (*RateLimiter) SetRate ¶ added in v0.18.0
func (rl *RateLimiter) SetRate(requestsPerSecond float64, burstSize int)
SetRate updates the rate limit dynamically.
func (*RateLimiter) TryWithRateLimit ¶ added in v0.18.0
TryWithRateLimit attempts to execute with rate limiting, returns immediately if rate limited.
func (*RateLimiter) Wait ¶ added in v0.18.0
func (rl *RateLimiter) Wait(ctx context.Context) error
Wait waits until a request can proceed or context is cancelled.
func (*RateLimiter) WaitModel ¶ added in v0.18.0
func (rl *RateLimiter) WaitModel(ctx context.Context, model string) error
WaitModel waits for a specific model's rate limit.
func (*RateLimiter) WithRateLimit ¶ added in v0.18.0
WithRateLimit wraps a function with rate limiting.
type RequestTimer ¶ added in v0.18.0
type RequestTimer struct {
// contains filtered or unexported fields
}
RequestTimer helps track request timing.
func NewRequestTimer ¶ added in v0.18.0
func NewRequestTimer(metrics *MetricsCollector, model, scenario string) *RequestTimer
NewRequestTimer creates a new request timer.
type RetryClient ¶ added in v0.18.0
type RetryClient struct {
// contains filtered or unexported fields
}
RetryClient wraps an LLM client with retry and backoff logic.
func NewRetryClient ¶ added in v0.18.0
func NewRetryClient(client LLMClient, maxRetries, retryMinWaitMs, retryMaxWaitMs int) *RetryClient
NewRetryClient creates a new retry client wrapper. Set maxRetries to 0 to disable retries.
func (*RetryClient) Analyze ¶ added in v0.18.0
Analyze implements the Analyze method with retry logic.
func (*RetryClient) ChatStream ¶ added in v0.18.0
ChatStream implements the ChatStream method with retry logic.
func (*RetryClient) HealthCheck ¶ added in v0.18.0
func (r *RetryClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck delegates to the underlying client.
type RouteStrategy ¶ added in v0.18.0
type RouteStrategy string
RouteStrategy defines the routing strategy for model selection.
const ( // StrategyCostPriority routes to the cheapest model that meets requirements. StrategyCostPriority RouteStrategy = "cost_priority" // StrategyLatencyPriority routes to the fastest model. StrategyLatencyPriority RouteStrategy = "latency_priority" // StrategyQualityPriority routes to the highest quality model. StrategyQualityPriority RouteStrategy = "quality_priority" // StrategyBalanced routes with balanced cost/quality tradeoff. StrategyBalanced RouteStrategy = "balanced" )
type Router ¶ added in v0.18.0
type Router struct {
// contains filtered or unexported fields
}
Router provides dynamic model routing based on scenario and strategy.
func NewRouter ¶ added in v0.18.0
func NewRouter(config RouterConfig, metrics *MetricsCollector) *Router
NewRouter creates a new model router.
func (*Router) AddModel ¶ added in v0.18.0
func (r *Router) AddModel(model ModelConfig)
AddModel dynamically adds a model to the router.
func (*Router) DetectScenario ¶ added in v0.18.0
DetectScenario infers the scenario from the prompt content.
func (*Router) GetDefaultStrategy ¶ added in v0.18.0
func (r *Router) GetDefaultStrategy() RouteStrategy
GetDefaultStrategy returns the default routing strategy.
func (*Router) GetModels ¶ added in v0.18.0
func (r *Router) GetModels() []ModelConfig
GetModels returns all configured models.
func (*Router) RemoveModel ¶ added in v0.18.0
RemoveModel removes a model by name.
func (*Router) SelectModel ¶ added in v0.18.0
func (r *Router) SelectModel(ctx context.Context, scenario Scenario, strategy RouteStrategy) (*ModelConfig, error)
SelectModel selects the best model for the given scenario and strategy.
type RouterConfig ¶ added in v0.18.0
type RouterConfig struct {
// DefaultStrategy is the default routing strategy.
DefaultStrategy RouteStrategy
// Models is the list of available models.
Models []ModelConfig
// ScenarioModelMap maps scenarios to preferred model names.
ScenarioModelMap map[Scenario]string
// FallbackModel is used when primary model fails.
FallbackModel string
// Logger for routing decisions.
Logger *slog.Logger
}
RouterConfig holds the configuration for the model router.
type Scenario ¶ added in v0.18.0
type Scenario string
Scenario defines the use case scenario for routing.
const ( // ScenarioChat is for conversational chat. ScenarioChat Scenario = "chat" // ScenarioAnalyze is for structured analysis/extraction. ScenarioAnalyze Scenario = "analyze" // ScenarioCode is for code generation/review. ScenarioCode Scenario = "code" // ScenarioReasoning is for complex reasoning tasks. ScenarioReasoning Scenario = "reasoning" )
type SessionCost ¶ added in v0.18.0
type SessionCost struct {
SessionID string
TotalInput int64
TotalOutput int64
TotalCost float64
RequestCount int64
FirstRequest time.Time
LastRequest time.Time
ModelBreakdown map[string]*ModelUsage
BudgetLimit float64 // Optional budget limit
BudgetAlerted bool // Whether budget alert has been triggered
}
SessionCost tracks costs for a single session.