Documentation
¶
Index ¶
- type AtomicRateLimitStats
- type BudgetAlert
- type BudgetAlertThreshold
- type BudgetConfig
- type BudgetManager
- func (bm *BudgetManager) GetAllTrackers() map[string]*BudgetTracker
- func (bm *BudgetManager) GetGlobalStats() BudgetStats
- func (bm *BudgetManager) GetTracker(sessionID string) *BudgetTracker
- func (bm *BudgetManager) RemoveTracker(sessionID string)
- func (bm *BudgetManager) ResetAll()
- func (bm *BudgetManager) TrackRequest(sessionID string, cost float64) error
- type BudgetPeriod
- type BudgetStats
- type BudgetTracker
- func (bt *BudgetTracker) CheckAlerts()
- func (bt *BudgetTracker) CheckBudget(estimatedCost float64) (bool, float64, error)
- func (bt *BudgetTracker) GetRemaining() float64
- func (bt *BudgetTracker) GetStats() BudgetStats
- func (bt *BudgetTracker) IsExceeded() bool
- func (bt *BudgetTracker) Reset()
- func (bt *BudgetTracker) SetAlertCallback(callback func(alert BudgetAlert))
- func (bt *BudgetTracker) SetLimit(limit float64)
- func (bt *BudgetTracker) TrackRequest(cost float64) error
- type CacheEntry
- type CacheKey
- type CachedClient
- func (c *CachedClient) Analyze(ctx context.Context, prompt string, target any) error
- func (c *CachedClient) CacheStats() (keys int, hits int, misses int)
- func (c *CachedClient) Chat(ctx context.Context, prompt string) (string, error)
- func (c *CachedClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (c *CachedClient) ClearCache()
- func (c *CachedClient) HealthCheck(ctx context.Context) HealthStatus
- type CircuitBreaker
- func (cb *CircuitBreaker) Execute(ctx context.Context, fn func() error) error
- func (cb *CircuitBreaker) ExecuteWithResult(ctx context.Context, fn func() (interface{}, error)) (interface{}, error)
- func (cb *CircuitBreaker) ForceClose()
- func (cb *CircuitBreaker) ForceOpen()
- func (cb *CircuitBreaker) GetFailureRate() float64
- func (cb *CircuitBreaker) GetState() CircuitState
- func (cb *CircuitBreaker) GetStats() CircuitBreakerStats
- func (cb *CircuitBreaker) IsHealthy() bool
- func (cb *CircuitBreaker) Reset()
- type CircuitBreakerConfig
- type CircuitBreakerStats
- type CircuitState
- type CostCalculator
- func (cc *CostCalculator) AddPricing(pricing ModelPricing)
- func (cc *CostCalculator) CalculateCost(modelName string, inputTokens, outputTokens int) (float64, error)
- func (cc *CostCalculator) CountTokens(text string) int
- func (cc *CostCalculator) CountTokensFromMessages(messages []openai.ChatCompletionMessage) int
- func (cc *CostCalculator) GetAllSessions() map[string]*SessionCost
- func (cc *CostCalculator) GetPricing(modelName string) (ModelPricing, bool)
- func (cc *CostCalculator) GetSessionCost(sessionID string) (*SessionCost, bool)
- func (cc *CostCalculator) GetTotalCost() float64
- func (cc *CostCalculator) ResetSession(sessionID string)
- func (cc *CostCalculator) SetSessionBudget(sessionID string, budget float64)
- func (cc *CostCalculator) TrackRequest(sessionID, modelName string, inputTokens, outputTokens int) (*SessionCost, float64, error)
- type FailoverConfig
- type FailoverHistory
- type FailoverManager
- func (fm *FailoverManager) Close()
- func (fm *FailoverManager) ExecuteWithFailover(ctx context.Context, fn func(provider *ProviderConfig) error) error
- func (fm *FailoverManager) GetCurrentProvider() *ProviderConfig
- func (fm *FailoverManager) GetStats() FailoverStats
- func (fm *FailoverManager) ManualFailover(providerName string) error
- func (fm *FailoverManager) Reset()
- func (fm *FailoverManager) SetCurrentProvider(name string)
- func (fm *FailoverManager) SetLastFailoverTime(t time.Time)
- type FailoverRecord
- type FailoverStats
- type HealthChecker
- type HealthMonitor
- func (h *HealthMonitor) Analyze(ctx context.Context, prompt string, target any) error
- func (h *HealthMonitor) Chat(ctx context.Context, prompt string) (string, error)
- func (h *HealthMonitor) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (h *HealthMonitor) HealthCheck(ctx context.Context) HealthStatus
- func (h *HealthMonitor) IsHealthy() bool
- func (h *HealthMonitor) LastHealthCheck() time.Time
- type HealthStatus
- type MetricsCollector
- func (mc *MetricsCollector) EndRequest()
- func (mc *MetricsCollector) GetStats() MetricsStats
- func (mc *MetricsCollector) RecordRequest(model string, scenario string, inputTokens int64, outputTokens int64, ...)
- func (mc *MetricsCollector) RecordRoutingDecision(scenario Scenario, strategy RouteStrategy, model string)
- func (mc *MetricsCollector) StartRequest()
- type MetricsConfig
- type MetricsStats
- type ModelConfig
- type ModelPricing
- type ModelUsage
- type OpenAIClient
- func (c *OpenAIClient) Analyze(ctx context.Context, prompt string, target any) error
- func (c *OpenAIClient) Chat(ctx context.Context, prompt string) (string, error)
- func (c *OpenAIClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (c *OpenAIClient) HealthCheck(ctx context.Context) HealthStatus
- type Priority
- type PriorityClient
- func (pc *PriorityClient) ProcessAll(ctx context.Context) error
- func (pc *PriorityClient) ProcessNext(ctx context.Context) error
- func (pc *PriorityClient) Submit(ctx context.Context, id string, priority Priority, execute func() error) error
- func (pc *PriorityClient) SubmitWithResult(ctx context.Context, id string, priority Priority, execute func() error) (<-chan error, error)
- type PriorityConfig
- type PriorityQueue
- type PriorityRequest
- type PriorityScheduler
- func (ps *PriorityScheduler) Clear()
- func (ps *PriorityScheduler) Dequeue(ctx context.Context) (*PriorityRequest, error)
- func (ps *PriorityScheduler) Enqueue(ctx context.Context, id string, priority Priority, execute func() error, ...) error
- func (ps *PriorityScheduler) GetStats() PriorityStats
- func (ps *PriorityScheduler) Shutdown()
- func (ps *PriorityScheduler) Size() int
- func (ps *PriorityScheduler) TryDequeue() *PriorityRequest
- type PriorityStats
- type ProviderConfig
- type RateLimitConfig
- type RateLimitStats
- type RateLimitedClient
- func (c *RateLimitedClient) Analyze(ctx context.Context, prompt string, target any) error
- func (c *RateLimitedClient) Chat(ctx context.Context, prompt string) (string, error)
- func (c *RateLimitedClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (c *RateLimitedClient) HealthCheck(ctx context.Context) HealthStatus
- func (c *RateLimitedClient) SetModel(model string)
- type RateLimiter
- func (rl *RateLimiter) Allow() bool
- func (rl *RateLimiter) Close()
- func (rl *RateLimiter) GetStats() RateLimitStats
- func (rl *RateLimiter) Remaining() int
- func (rl *RateLimiter) Reset()
- func (rl *RateLimiter) ResetStats()
- func (rl *RateLimiter) SetModelRate(model string, requestsPerSecond float64, burstSize int)
- func (rl *RateLimiter) SetRate(requestsPerSecond float64, burstSize int)
- func (rl *RateLimiter) TryWithRateLimit(ctx context.Context, model string, fn func() error) error
- func (rl *RateLimiter) Wait(ctx context.Context) error
- func (rl *RateLimiter) WaitModel(ctx context.Context, model string) error
- func (rl *RateLimiter) WithRateLimit(ctx context.Context, model string, fn func() error) error
- type RequestTimer
- type RetryClient
- func (r *RetryClient) Analyze(ctx context.Context, prompt string, target any) error
- func (r *RetryClient) Chat(ctx context.Context, prompt string) (string, error)
- func (r *RetryClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)
- func (r *RetryClient) HealthCheck(ctx context.Context) HealthStatus
- type RouteStrategy
- type Router
- func (r *Router) AddModel(model ModelConfig)
- func (r *Router) DetectScenario(prompt string) Scenario
- func (r *Router) GetDefaultStrategy() RouteStrategy
- func (r *Router) GetModels() []ModelConfig
- func (r *Router) RemoveModel(name string)
- func (r *Router) SelectModel(ctx context.Context, scenario Scenario, strategy RouteStrategy) (*ModelConfig, error)
- type RouterConfig
- type Scenario
- type SessionCost
- type TokenUsage
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AtomicRateLimitStats ¶ added in v0.18.0
type AtomicRateLimitStats struct {
TotalRequests atomic.Int64
QueuedRequests atomic.Int64
RejectedRequests atomic.Int64
AvgWaitTimeMs atomic.Float64
}
AtomicRateLimitStats holds atomic stats for concurrent access.
type BudgetAlert ¶ added in v0.18.0
type BudgetAlert struct {
Timestamp time.Time
Period BudgetPeriod
CurrentCost float64
Limit float64
Percentage float64
Message string
IsHardLimit bool
}
BudgetAlert represents a budget alert event.
type BudgetAlertThreshold ¶ added in v0.18.0
type BudgetAlertThreshold struct {
// Percentage is the budget percentage (e.g., 80 for 80%).
Percentage float64
// Message is the alert message template.
Message string
}
BudgetAlertThreshold defines alert thresholds.
type BudgetConfig ¶ added in v0.18.0
type BudgetConfig struct {
// Period is the budget period (daily/weekly/monthly/session).
Period BudgetPeriod
// Limit is the budget limit in USD.
Limit float64
// AlertThresholds is the list of alert thresholds.
AlertThresholds []BudgetAlertThreshold
// EnableHardLimit enables hard limit (reject requests over budget).
EnableHardLimit bool
// EnableSoftLimit enables soft limit (warn but allow).
EnableSoftLimit bool
// Logger for budget events.
Logger *slog.Logger
}
BudgetConfig holds configuration for budget control.
func DefaultBudgetConfig ¶ added in v0.18.0
func DefaultBudgetConfig() BudgetConfig
DefaultBudgetConfig returns sensible defaults.
type BudgetManager ¶ added in v0.18.0
type BudgetManager struct {
// contains filtered or unexported fields
}
BudgetManager manages multiple budget trackers (e.g., per user/session).
func NewBudgetManager ¶ added in v0.18.0
func NewBudgetManager(config BudgetConfig) *BudgetManager
NewBudgetManager creates a new budget manager.
func (*BudgetManager) GetAllTrackers ¶ added in v0.18.0
func (bm *BudgetManager) GetAllTrackers() map[string]*BudgetTracker
GetAllTrackers returns all budget trackers.
func (*BudgetManager) GetGlobalStats ¶ added in v0.18.0
func (bm *BudgetManager) GetGlobalStats() BudgetStats
GetGlobalStats returns global budget statistics.
func (*BudgetManager) GetTracker ¶ added in v0.18.0
func (bm *BudgetManager) GetTracker(sessionID string) *BudgetTracker
GetTracker gets or creates a budget tracker for a session.
func (*BudgetManager) RemoveTracker ¶ added in v0.18.0
func (bm *BudgetManager) RemoveTracker(sessionID string)
RemoveTracker removes a budget tracker.
func (*BudgetManager) ResetAll ¶ added in v0.18.0
func (bm *BudgetManager) ResetAll()
ResetAll resets all budget trackers.
func (*BudgetManager) TrackRequest ¶ added in v0.18.0
func (bm *BudgetManager) TrackRequest(sessionID string, cost float64) error
TrackRequest tracks a request across all trackers.
type BudgetPeriod ¶ added in v0.18.0
type BudgetPeriod string
BudgetPeriod defines the budget period type.
const ( // BudgetDaily - Daily budget reset. BudgetDaily BudgetPeriod = "daily" // BudgetWeekly - Weekly budget reset (Monday). BudgetWeekly BudgetPeriod = "weekly" // BudgetMonthly - Monthly budget reset (1st). BudgetMonthly BudgetPeriod = "monthly" // BudgetSession - Session-based budget (no auto-reset). BudgetSession BudgetPeriod = "session" )
type BudgetStats ¶ added in v0.18.0
type BudgetStats struct {
CurrentCost float64
Limit float64
Remaining float64
PercentageUsed float64
RequestCount int64
PeriodStart time.Time
PeriodEnd time.Time
IsExceeded bool
AlertsTriggered []float64
}
BudgetStats holds budget statistics.
type BudgetTracker ¶ added in v0.18.0
type BudgetTracker struct {
// contains filtered or unexported fields
}
BudgetTracker tracks budget usage and enforces limits.
func NewBudgetTracker ¶ added in v0.18.0
func NewBudgetTracker(config BudgetConfig, sessionID string) *BudgetTracker
NewBudgetTracker creates a new budget tracker.
func (*BudgetTracker) CheckAlerts ¶ added in v0.18.0
func (bt *BudgetTracker) CheckAlerts()
CheckAlerts checks and triggers budget alerts (public for testing).
func (*BudgetTracker) CheckBudget ¶ added in v0.18.0
func (bt *BudgetTracker) CheckBudget(estimatedCost float64) (bool, float64, error)
CheckBudget checks if a request is within budget. Returns (allowed, cost, error).
func (*BudgetTracker) GetRemaining ¶ added in v0.18.0
func (bt *BudgetTracker) GetRemaining() float64
GetRemaining returns the remaining budget.
func (*BudgetTracker) GetStats ¶ added in v0.18.0
func (bt *BudgetTracker) GetStats() BudgetStats
GetStats returns budget statistics.
func (*BudgetTracker) IsExceeded ¶ added in v0.18.0
func (bt *BudgetTracker) IsExceeded() bool
IsExceeded returns true if budget is exceeded.
func (*BudgetTracker) Reset ¶ added in v0.18.0
func (bt *BudgetTracker) Reset()
Reset manually resets the budget tracker.
func (*BudgetTracker) SetAlertCallback ¶ added in v0.18.0
func (bt *BudgetTracker) SetAlertCallback(callback func(alert BudgetAlert))
SetAlertCallback sets a callback function for budget alerts.
func (*BudgetTracker) SetLimit ¶ added in v0.18.0
func (bt *BudgetTracker) SetLimit(limit float64)
SetLimit updates the budget limit.
func (*BudgetTracker) TrackRequest ¶ added in v0.18.0
func (bt *BudgetTracker) TrackRequest(cost float64) error
TrackRequest tracks a request's cost against the budget.
type CacheEntry ¶ added in v0.18.0
CacheEntry represents a cached response.
type CachedClient ¶ added in v0.18.0
type CachedClient struct {
// contains filtered or unexported fields
}
CachedClient wraps an LLM client with LRU caching.
func NewCachedClient ¶ added in v0.18.0
func NewCachedClient(client interface {
Chat(ctx context.Context, prompt string) (string, error)
Analyze(ctx context.Context, prompt string, target any) error
ChatStream(ctx context.Context, prompt string) (<-chan string, error)
HealthCheck(ctx context.Context) HealthStatus
}, cacheSize int) *CachedClient
NewCachedClient creates a new cached client wrapper. Set cacheSize to 0 to disable caching.
func (*CachedClient) CacheStats ¶ added in v0.18.0
func (c *CachedClient) CacheStats() (keys int, hits int, misses int)
CacheStats returns current cache statistics.
func (*CachedClient) ChatStream ¶ added in v0.18.0
ChatStream does not use caching (streams are not cacheable).
func (*CachedClient) ClearCache ¶ added in v0.18.0
func (c *CachedClient) ClearCache()
ClearCache clears all cached entries.
func (*CachedClient) HealthCheck ¶ added in v0.18.0
func (c *CachedClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck delegates to the underlying client.
type CircuitBreaker ¶ added in v0.18.0
type CircuitBreaker struct {
// contains filtered or unexported fields
}
CircuitBreaker wraps gobreaker with additional HotPlex-specific features.
func NewCircuitBreaker ¶ added in v0.18.0
func NewCircuitBreaker(config CircuitBreakerConfig) *CircuitBreaker
NewCircuitBreaker creates a new circuit breaker.
func (*CircuitBreaker) Execute ¶ added in v0.18.0
func (cb *CircuitBreaker) Execute(ctx context.Context, fn func() error) error
Execute executes a function with circuit breaker protection.
func (*CircuitBreaker) ExecuteWithResult ¶ added in v0.18.0
func (cb *CircuitBreaker) ExecuteWithResult(ctx context.Context, fn func() (interface{}, error)) (interface{}, error)
ExecuteWithResult executes a function that returns a value with circuit breaker protection.
func (*CircuitBreaker) ForceClose ¶ added in v0.18.0
func (cb *CircuitBreaker) ForceClose()
ForceClose manually closes the circuit (override protection).
func (*CircuitBreaker) ForceOpen ¶ added in v0.18.0
func (cb *CircuitBreaker) ForceOpen()
ForceOpen manually opens the circuit (for maintenance/emergency).
func (*CircuitBreaker) GetFailureRate ¶ added in v0.18.0
func (cb *CircuitBreaker) GetFailureRate() float64
GetFailureRate returns the current failure rate (last interval).
func (*CircuitBreaker) GetState ¶ added in v0.18.0
func (cb *CircuitBreaker) GetState() CircuitState
GetState returns the current circuit state.
func (*CircuitBreaker) GetStats ¶ added in v0.18.0
func (cb *CircuitBreaker) GetStats() CircuitBreakerStats
GetStats returns circuit breaker statistics.
func (*CircuitBreaker) IsHealthy ¶ added in v0.18.0
func (cb *CircuitBreaker) IsHealthy() bool
IsHealthy returns true if circuit is closed or half-open.
func (*CircuitBreaker) Reset ¶ added in v0.18.0
func (cb *CircuitBreaker) Reset()
Reset manually resets the circuit breaker to closed state.
type CircuitBreakerConfig ¶ added in v0.18.0
type CircuitBreakerConfig struct {
// Name is the circuit breaker name (for logging/metrics).
Name string
// MaxFailures is the number of failures before opening circuit.
MaxFailures uint32
// Interval is the time window for counting failures.
Interval time.Duration
// Timeout is how long circuit stays open before half-open.
Timeout time.Duration
// HalfOpenMaxRequests is max requests allowed in half-open state.
HalfOpenMaxRequests uint32
// SuccessThreshold is successes needed in half-open to close.
SuccessThreshold uint32
// Logger for circuit state changes.
Logger *slog.Logger
}
CircuitBreakerConfig holds configuration for circuit breaker.
func DefaultCircuitBreakerConfig ¶ added in v0.18.0
func DefaultCircuitBreakerConfig() CircuitBreakerConfig
DefaultCircuitBreakerConfig returns sensible defaults.
type CircuitBreakerStats ¶ added in v0.18.0
type CircuitBreakerStats struct {
State CircuitState
TotalRequests uint64
SuccessRequests uint64
FailRequests uint64
CircuitOpens uint64
LastStateChange time.Time
IsForced bool
}
CircuitBreakerStats holds circuit breaker statistics.
type CircuitState ¶ added in v0.18.0
type CircuitState string
CircuitState represents the state of a circuit breaker.
const ( // CircuitClosed - Normal operation, requests pass through. CircuitClosed CircuitState = "closed" // CircuitOpen - Circuit is tripped, requests fail fast. CircuitOpen CircuitState = "open" // CircuitHalfOpen - Testing if service has recovered. CircuitHalfOpen CircuitState = "half-open" )
type CostCalculator ¶ added in v0.18.0
type CostCalculator struct {
// contains filtered or unexported fields
}
CostCalculator calculates costs based on model pricing.
func NewCostCalculator ¶ added in v0.18.0
func NewCostCalculator() *CostCalculator
NewCostCalculator creates a new cost calculator.
func (*CostCalculator) AddPricing ¶ added in v0.18.0
func (cc *CostCalculator) AddPricing(pricing ModelPricing)
AddPricing adds or updates pricing for a model.
func (*CostCalculator) CalculateCost ¶ added in v0.18.0
func (cc *CostCalculator) CalculateCost(modelName string, inputTokens, outputTokens int) (float64, error)
CalculateCost calculates the cost for a request.
func (*CostCalculator) CountTokens ¶ added in v0.18.0
func (cc *CostCalculator) CountTokens(text string) int
CountTokens estimates token count from text. This is a simple approximation; for production use, consider using tiktoken.
func (*CostCalculator) CountTokensFromMessages ¶ added in v0.18.0
func (cc *CostCalculator) CountTokensFromMessages(messages []openai.ChatCompletionMessage) int
CountTokensFromMessages counts tokens from OpenAI messages.
func (*CostCalculator) GetAllSessions ¶ added in v0.18.0
func (cc *CostCalculator) GetAllSessions() map[string]*SessionCost
GetAllSessions returns all session costs.
func (*CostCalculator) GetPricing ¶ added in v0.18.0
func (cc *CostCalculator) GetPricing(modelName string) (ModelPricing, bool)
GetPricing returns pricing for a model.
func (*CostCalculator) GetSessionCost ¶ added in v0.18.0
func (cc *CostCalculator) GetSessionCost(sessionID string) (*SessionCost, bool)
GetSessionCost returns cost statistics for a session.
func (*CostCalculator) GetTotalCost ¶ added in v0.18.0
func (cc *CostCalculator) GetTotalCost() float64
GetTotalCost returns total cost across all sessions.
func (*CostCalculator) ResetSession ¶ added in v0.18.0
func (cc *CostCalculator) ResetSession(sessionID string)
ResetSession resets a session's cost tracking.
func (*CostCalculator) SetSessionBudget ¶ added in v0.18.0
func (cc *CostCalculator) SetSessionBudget(sessionID string, budget float64)
SetSessionBudget sets a budget limit for a session.
func (*CostCalculator) TrackRequest ¶ added in v0.18.0
func (cc *CostCalculator) TrackRequest(sessionID, modelName string, inputTokens, outputTokens int) (*SessionCost, float64, error)
TrackRequest tracks a request's cost for a session.
type FailoverConfig ¶ added in v0.18.0
type FailoverConfig struct {
// Providers is the list of configured providers.
Providers []ProviderConfig
// EnableAutoFailover enables automatic failover on errors.
EnableAutoFailover bool
// EnableFailback enables automatic failback to primary.
EnableFailback bool
// FailbackCooldown is the wait time before attempting failback.
FailbackCooldown time.Duration
// HealthCheckInterval is how often to check provider health.
HealthCheckInterval time.Duration
// MaxFailoverAttempts is max consecutive failovers before giving up.
MaxFailoverAttempts int
// Logger for failover events.
Logger *slog.Logger
// HistorySize is the number of failover records to keep.
HistorySize int
}
FailoverConfig holds configuration for multi-provider failover.
func DefaultFailoverConfig ¶ added in v0.18.0
func DefaultFailoverConfig() FailoverConfig
DefaultFailoverConfig returns sensible defaults.
type FailoverHistory ¶ added in v0.18.0
type FailoverHistory struct {
// contains filtered or unexported fields
}
FailoverHistory maintains a circular buffer of failover records.
func NewFailoverHistory ¶ added in v0.18.0
func NewFailoverHistory(size int) *FailoverHistory
NewFailoverHistory creates a new failover history buffer.
func (*FailoverHistory) Add ¶ added in v0.18.0
func (h *FailoverHistory) Add(record FailoverRecord)
Add adds a failover record.
func (*FailoverHistory) GetRecent ¶ added in v0.18.0
func (h *FailoverHistory) GetRecent(count int) []FailoverRecord
GetRecent returns the most recent failover records.
type FailoverManager ¶ added in v0.18.0
type FailoverManager struct {
// contains filtered or unexported fields
}
FailoverManager manages multi-provider failover logic.
func NewFailoverManager ¶ added in v0.18.0
func NewFailoverManager(config FailoverConfig) *FailoverManager
NewFailoverManager creates a new failover manager.
func (*FailoverManager) Close ¶ added in v0.18.0
func (fm *FailoverManager) Close()
Close stops the health check goroutine and cleans up resources. Should be called when the FailoverManager is no longer needed.
func (*FailoverManager) ExecuteWithFailover ¶ added in v0.18.0
func (fm *FailoverManager) ExecuteWithFailover(ctx context.Context, fn func(provider *ProviderConfig) error) error
ExecuteWithFailover executes a function with automatic failover.
func (*FailoverManager) GetCurrentProvider ¶ added in v0.18.0
func (fm *FailoverManager) GetCurrentProvider() *ProviderConfig
GetCurrentProvider returns the current active provider.
func (*FailoverManager) GetStats ¶ added in v0.18.0
func (fm *FailoverManager) GetStats() FailoverStats
GetStats returns failover statistics.
func (*FailoverManager) ManualFailover ¶ added in v0.18.0
func (fm *FailoverManager) ManualFailover(providerName string) error
ManualFailover manually switches to a specific provider.
func (*FailoverManager) Reset ¶ added in v0.18.0
func (fm *FailoverManager) Reset()
Reset resets failover state to initial configuration.
func (*FailoverManager) SetCurrentProvider ¶ added in v0.18.0
func (fm *FailoverManager) SetCurrentProvider(name string)
SetCurrentProvider sets the current provider (for testing).
func (*FailoverManager) SetLastFailoverTime ¶ added in v0.18.0
func (fm *FailoverManager) SetLastFailoverTime(t time.Time)
SetLastFailoverTime sets the last failover time (for testing).
type FailoverRecord ¶ added in v0.18.0
type FailoverRecord struct {
Timestamp time.Time
FromProvider string
ToProvider string
Reason string
Duration time.Duration
Success bool
}
FailoverRecord represents a failover event record.
type FailoverStats ¶ added in v0.18.0
type FailoverStats struct {
IsActive bool
CurrentProvider string
FailoverCount int32
LastFailoverTime time.Time
HealthyProviders []string
UnhealthyProviders []string
RecentFailovers []FailoverRecord
}
FailoverStats holds failover statistics.
type HealthChecker ¶ added in v0.18.0
type HealthChecker interface {
HealthCheck(ctx context.Context) HealthStatus
}
HealthChecker provides health check capability.
type HealthMonitor ¶ added in v0.18.0
type HealthMonitor struct {
// contains filtered or unexported fields
}
HealthMonitor wraps a client with health monitoring capabilities.
func NewHealthMonitor ¶ added in v0.18.0
func NewHealthMonitor(client interface {
Chat(ctx context.Context, prompt string) (string, error)
Analyze(ctx context.Context, prompt string, target any) error
ChatStream(ctx context.Context, prompt string) (<-chan string, error)
HealthCheck(ctx context.Context) HealthStatus
}, checkInterval time.Duration) *HealthMonitor
NewHealthMonitor creates a health monitor wrapper.
func (*HealthMonitor) ChatStream ¶ added in v0.18.0
ChatStream delegates to the underlying client.
func (*HealthMonitor) HealthCheck ¶ added in v0.18.0
func (h *HealthMonitor) HealthCheck(ctx context.Context) HealthStatus
HealthCheck performs a health check with optional caching.
func (*HealthMonitor) IsHealthy ¶ added in v0.18.0
func (h *HealthMonitor) IsHealthy() bool
IsHealthy returns true if the last health check passed.
func (*HealthMonitor) LastHealthCheck ¶ added in v0.18.0
func (h *HealthMonitor) LastHealthCheck() time.Time
LastHealthCheck returns the time of the last health check.
type HealthStatus ¶ added in v0.18.0
HealthStatus represents the health status of an LLM client. Exported for use by the brain package.
type MetricsCollector ¶ added in v0.18.0
type MetricsCollector struct {
// contains filtered or unexported fields
}
MetricsCollector collects and exports LLM metrics via OpenTelemetry.
func NewMetricsCollector ¶ added in v0.18.0
func NewMetricsCollector(config MetricsConfig) *MetricsCollector
NewMetricsCollector creates a new metrics collector.
func (*MetricsCollector) EndRequest ¶ added in v0.18.0
func (mc *MetricsCollector) EndRequest()
EndRequest decrements the active request counter.
func (*MetricsCollector) GetStats ¶ added in v0.18.0
func (mc *MetricsCollector) GetStats() MetricsStats
GetStats returns current metrics statistics.
func (*MetricsCollector) RecordRequest ¶ added in v0.18.0
func (mc *MetricsCollector) RecordRequest( model string, scenario string, inputTokens int64, outputTokens int64, cost float64, latencyMs float64, err error, )
RecordRequest records a completed request with metrics.
func (*MetricsCollector) RecordRoutingDecision ¶ added in v0.18.0
func (mc *MetricsCollector) RecordRoutingDecision(scenario Scenario, strategy RouteStrategy, model string)
RecordRoutingDecision records a routing decision.
func (*MetricsCollector) StartRequest ¶ added in v0.18.0
func (mc *MetricsCollector) StartRequest()
StartRequest increments the active request counter.
type MetricsConfig ¶ added in v0.18.0
type MetricsConfig struct {
// Enabled enables metrics collection.
Enabled bool
// ServiceName is the name for the metrics service.
ServiceName string
// MaxLatencySamples is the max number of latency samples to keep locally.
MaxLatencySamples int
}
MetricsConfig holds configuration for metrics collection.
type MetricsStats ¶ added in v0.18.0
type MetricsStats struct {
TotalRequests int64
TotalInputTokens int64
TotalOutputTokens int64
TotalCost float64
TotalErrors int64
ErrorRate float64
AvgLatencyMs float64
ActiveRequests int64
}
MetricsStats represents aggregated metrics statistics.
type ModelConfig ¶ added in v0.18.0
type ModelConfig struct {
// Name is the model identifier (e.g., "gpt-4o-mini", "qwen-plus").
Name string
// Provider is the provider name (e.g., "openai", "dashscope").
Provider string
// APIKey is the API key for this model.
APIKey string
// Endpoint is the optional custom endpoint.
Endpoint string
// CostPer1KInput is the cost per 1K input tokens (USD).
CostPer1KInput float64
// CostPer1KOutput is the cost per 1K output tokens (USD).
CostPer1KOutput float64
// AvgLatencyMs is the average latency in milliseconds.
AvgLatencyMs int64
// MaxTokens is the maximum context window size.
MaxTokens int
// Enabled indicates if this model is available for routing.
Enabled bool
}
ModelConfig represents configuration for a single model.
type ModelPricing ¶ added in v0.18.0
type ModelPricing struct {
ModelName string
Provider string
CostPer1KInput float64 // USD per 1K input tokens
CostPer1KOutput float64 // USD per 1K output tokens
CostPer1KCache float64 // USD per 1K cached input tokens (if applicable)
Currency string
EffectiveDate time.Time
}
ModelPricing represents pricing for a model.
func DefaultModelPricing ¶ added in v0.18.0
func DefaultModelPricing() []ModelPricing
DefaultModelPricing returns default pricing for common models.
type ModelUsage ¶ added in v0.18.0
type ModelUsage struct {
ModelName string
InputTokens int64
OutputTokens int64
Cost float64
RequestCount int64
}
ModelUsage tracks usage for a specific model within a session.
type OpenAIClient ¶
type OpenAIClient struct {
// contains filtered or unexported fields
}
OpenAIClient implements OpenAI-compatible LLM interactions. It can be used for OpenAI, DeepSeek, Groq, etc.
func NewOpenAIClient ¶
func NewOpenAIClient(apiKey, endpoint, model string, logger *slog.Logger) *OpenAIClient
NewOpenAIClient creates a new OpenAI compatible client.
func (*OpenAIClient) Analyze ¶
Analyze requests JSON formatted output from the model. It uses "ResponseFormat: {Type: JSON_OBJECT}" to ensure model compatibility for structured reasoning.
func (*OpenAIClient) ChatStream ¶ added in v0.18.0
ChatStream returns a channel that streams tokens as they are generated. The channel is closed when the stream completes or an error occurs.
func (*OpenAIClient) HealthCheck ¶ added in v0.18.0
func (c *OpenAIClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck performs a simple health check by making a minimal API call.
type PriorityClient ¶ added in v0.18.0
type PriorityClient struct {
// contains filtered or unexported fields
}
PriorityClient wraps a scheduler with execution capabilities.
func NewPriorityClient ¶ added in v0.18.0
func NewPriorityClient(scheduler *PriorityScheduler, timeout time.Duration, logger *slog.Logger) *PriorityClient
NewPriorityClient creates a new priority client.
func (*PriorityClient) ProcessAll ¶ added in v0.18.0
func (pc *PriorityClient) ProcessAll(ctx context.Context) error
ProcessAll processes all requests in the queue.
func (*PriorityClient) ProcessNext ¶ added in v0.18.0
func (pc *PriorityClient) ProcessNext(ctx context.Context) error
ProcessNext processes the next request in the queue.
func (*PriorityClient) Submit ¶ added in v0.18.0
func (pc *PriorityClient) Submit(ctx context.Context, id string, priority Priority, execute func() error) error
Submit submits a request for execution.
func (*PriorityClient) SubmitWithResult ¶ added in v0.18.0
func (pc *PriorityClient) SubmitWithResult(ctx context.Context, id string, priority Priority, execute func() error) (<-chan error, error)
SubmitWithResult submits a request and returns the result.
type PriorityConfig ¶ added in v0.18.0
type PriorityConfig struct {
// MaxQueueSize is the maximum queue size.
MaxQueueSize int
// EnableLowPriorityDrop enables dropping low priority requests when queue is full.
EnableLowPriorityDrop bool
// HighPriorityReserve reserves queue slots for high priority.
HighPriorityReserve int
// MaxWaitTime is the maximum wait time for requests.
MaxWaitTime time.Duration
// Logger for queue events.
Logger *slog.Logger
}
PriorityConfig holds configuration for priority queue.
func DefaultPriorityConfig ¶ added in v0.18.0
func DefaultPriorityConfig() PriorityConfig
DefaultPriorityConfig returns sensible defaults.
type PriorityQueue ¶ added in v0.18.0
type PriorityQueue struct {
// contains filtered or unexported fields
}
PriorityQueue implements a priority queue for requests.
func (*PriorityQueue) Len ¶ added in v0.18.0
func (pq *PriorityQueue) Len() int
Len returns the number of items in the queue.
func (*PriorityQueue) Less ¶ added in v0.18.0
func (pq *PriorityQueue) Less(i, j int) bool
Less compares two items (lower priority value = higher priority).
func (*PriorityQueue) Peek ¶ added in v0.18.0
func (pq *PriorityQueue) Peek() *PriorityRequest
Peek returns the highest priority item without removing it.
func (*PriorityQueue) Pop ¶ added in v0.18.0
func (pq *PriorityQueue) Pop() interface{}
Pop removes and returns the highest priority item.
func (*PriorityQueue) Push ¶ added in v0.18.0
func (pq *PriorityQueue) Push(x interface{})
Push adds an item to the queue.
func (*PriorityQueue) Swap ¶ added in v0.18.0
func (pq *PriorityQueue) Swap(i, j int)
Swap swaps two items.
type PriorityRequest ¶ added in v0.18.0
type PriorityRequest struct {
ID string
Priority Priority
Context context.Context
Execute func() error
CreatedAt time.Time
Deadline time.Time
MaxWait time.Duration
// contains filtered or unexported fields
}
PriorityRequest represents a request in the priority queue.
type PriorityScheduler ¶ added in v0.18.0
type PriorityScheduler struct {
// contains filtered or unexported fields
}
PriorityScheduler manages priority-based request scheduling.
func NewPriorityScheduler ¶ added in v0.18.0
func NewPriorityScheduler(config PriorityConfig) *PriorityScheduler
NewPriorityScheduler creates a new priority scheduler.
func (*PriorityScheduler) Clear ¶ added in v0.18.0
func (ps *PriorityScheduler) Clear()
Clear clears all items from the queue.
func (*PriorityScheduler) Dequeue ¶ added in v0.18.0
func (ps *PriorityScheduler) Dequeue(ctx context.Context) (*PriorityRequest, error)
Dequeue removes and returns the highest priority request.
func (*PriorityScheduler) Enqueue ¶ added in v0.18.0
func (ps *PriorityScheduler) Enqueue(ctx context.Context, id string, priority Priority, execute func() error, maxWait time.Duration) error
Enqueue adds a request to the priority queue.
func (*PriorityScheduler) GetStats ¶ added in v0.18.0
func (ps *PriorityScheduler) GetStats() PriorityStats
GetStats returns scheduler statistics.
func (*PriorityScheduler) Shutdown ¶ added in v0.18.0
func (ps *PriorityScheduler) Shutdown()
Shutdown gracefully shuts down the scheduler.
func (*PriorityScheduler) Size ¶ added in v0.18.0
func (ps *PriorityScheduler) Size() int
Size returns the current queue size.
func (*PriorityScheduler) TryDequeue ¶ added in v0.18.0
func (ps *PriorityScheduler) TryDequeue() *PriorityRequest
TryDequeue tries to dequeue without blocking.
type PriorityStats ¶ added in v0.18.0
type PriorityStats struct {
QueueSize int32
MaxQueueSize int
Dropped int64
Processed int64
HighProcessed int64
MediumProcessed int64
LowProcessed int64
HighDropped int64
MediumDropped int64
LowDropped int64
IsShutdown bool
}
PriorityStats holds scheduler statistics.
type ProviderConfig ¶ added in v0.18.0
type ProviderConfig struct {
// Name is the provider identifier (e.g., "openai", "dashscope", "anthropic").
Name string
// APIKey is the API key for this provider.
APIKey string
// Endpoint is the optional custom endpoint.
Endpoint string
// Models is the list of models available from this provider.
Models []string
// Priority is the failover priority (1 = primary, higher = backup).
Priority int
// Enabled indicates if this provider is available.
Enabled bool
// Timeout is the request timeout for this provider.
Timeout time.Duration
// MaxRetries is the max retry attempts for this provider.
MaxRetries int
}
ProviderConfig represents configuration for an LLM provider.
type RateLimitConfig ¶ added in v0.18.0
type RateLimitConfig struct {
// RequestsPerSecond is the sustained request rate.
RequestsPerSecond float64
// BurstSize is the maximum burst size.
BurstSize int
// MaxQueueSize is the maximum number of waiting requests.
MaxQueueSize int
// QueueTimeout is the maximum time a request can wait in queue.
QueueTimeout time.Duration
// PerModel enables per-model rate limiting.
PerModel bool
}
RateLimitConfig holds configuration for rate limiting.
type RateLimitStats ¶ added in v0.18.0
type RateLimitStats struct {
TotalRequests int64
QueuedRequests int64
RejectedRequests int64
AvgWaitTimeMs float64
LastReset time.Time
}
RateLimitStats holds rate limiting statistics.
type RateLimitedClient ¶ added in v0.18.0
type RateLimitedClient struct {
// contains filtered or unexported fields
}
RateLimitedClient wraps a client with rate limiting.
func NewRateLimitedClient ¶ added in v0.18.0
func NewRateLimitedClient(
client interface {
Chat(ctx context.Context, prompt string) (string, error)
Analyze(ctx context.Context, prompt string, target any) error
ChatStream(ctx context.Context, prompt string) (<-chan string, error)
HealthCheck(ctx context.Context) HealthStatus
},
limiter *RateLimiter,
) *RateLimitedClient
NewRateLimitedClient creates a new rate-limited client wrapper.
func (*RateLimitedClient) ChatStream ¶ added in v0.18.0
ChatStream implements rate-limited streaming.
func (*RateLimitedClient) HealthCheck ¶ added in v0.18.0
func (c *RateLimitedClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck implements rate-limited health check.
func (*RateLimitedClient) SetModel ¶ added in v0.18.0
func (c *RateLimitedClient) SetModel(model string)
SetModel sets the model for per-model rate limiting.
type RateLimiter ¶ added in v0.18.0
type RateLimiter struct {
// contains filtered or unexported fields
}
RateLimiter provides rate limiting with token bucket algorithm.
func NewRateLimiter ¶ added in v0.18.0
func NewRateLimiter(config RateLimitConfig) *RateLimiter
NewRateLimiter creates a new rate limiter.
func (*RateLimiter) Allow ¶ added in v0.18.0
func (rl *RateLimiter) Allow() bool
Allow checks if a request can proceed immediately.
func (*RateLimiter) Close ¶ added in v0.18.0
func (rl *RateLimiter) Close()
Close closes the rate limiter and stops queue processing.
func (*RateLimiter) GetStats ¶ added in v0.18.0
func (rl *RateLimiter) GetStats() RateLimitStats
GetStats returns current rate limiting statistics.
func (*RateLimiter) Remaining ¶ added in v0.18.0
func (rl *RateLimiter) Remaining() int
Remaining returns the number of requests remaining in the current burst.
func (*RateLimiter) Reset ¶ added in v0.18.0
func (rl *RateLimiter) Reset()
Reset resets the rate limiter immediately.
func (*RateLimiter) ResetStats ¶ added in v0.18.0
func (rl *RateLimiter) ResetStats()
ResetStats resets the statistics.
func (*RateLimiter) SetModelRate ¶ added in v0.18.0
func (rl *RateLimiter) SetModelRate(model string, requestsPerSecond float64, burstSize int)
SetModelRate sets rate limit for a specific model.
func (*RateLimiter) SetRate ¶ added in v0.18.0
func (rl *RateLimiter) SetRate(requestsPerSecond float64, burstSize int)
SetRate updates the rate limit dynamically.
func (*RateLimiter) TryWithRateLimit ¶ added in v0.18.0
TryWithRateLimit attempts to execute with rate limiting, returns immediately if rate limited.
func (*RateLimiter) Wait ¶ added in v0.18.0
func (rl *RateLimiter) Wait(ctx context.Context) error
Wait waits until a request can proceed or context is cancelled.
func (*RateLimiter) WaitModel ¶ added in v0.18.0
func (rl *RateLimiter) WaitModel(ctx context.Context, model string) error
WaitModel waits for a specific model's rate limit.
func (*RateLimiter) WithRateLimit ¶ added in v0.18.0
WithRateLimit wraps a function with rate limiting.
type RequestTimer ¶ added in v0.18.0
type RequestTimer struct {
// contains filtered or unexported fields
}
RequestTimer helps track request timing.
func NewRequestTimer ¶ added in v0.18.0
func NewRequestTimer(metrics *MetricsCollector, model, scenario string) *RequestTimer
NewRequestTimer creates a new request timer.
type RetryClient ¶ added in v0.18.0
type RetryClient struct {
// contains filtered or unexported fields
}
RetryClient wraps an LLM client with retry and backoff logic.
func NewRetryClient ¶ added in v0.18.0
func NewRetryClient(client interface {
Chat(ctx context.Context, prompt string) (string, error)
Analyze(ctx context.Context, prompt string, target any) error
ChatStream(ctx context.Context, prompt string) (<-chan string, error)
HealthCheck(ctx context.Context) HealthStatus
}, maxRetries, retryMinWaitMs, retryMaxWaitMs int) *RetryClient
NewRetryClient creates a new retry client wrapper. Set maxRetries to 0 to disable retries.
func (*RetryClient) Analyze ¶ added in v0.18.0
Analyze implements the Analyze method with retry logic.
func (*RetryClient) ChatStream ¶ added in v0.18.0
ChatStream implements the ChatStream method with retry logic.
func (*RetryClient) HealthCheck ¶ added in v0.18.0
func (r *RetryClient) HealthCheck(ctx context.Context) HealthStatus
HealthCheck delegates to the underlying client.
type RouteStrategy ¶ added in v0.18.0
type RouteStrategy string
RouteStrategy defines the routing strategy for model selection.
const ( // StrategyCostPriority routes to the cheapest model that meets requirements. StrategyCostPriority RouteStrategy = "cost_priority" // StrategyLatencyPriority routes to the fastest model. StrategyLatencyPriority RouteStrategy = "latency_priority" // StrategyQualityPriority routes to the highest quality model. StrategyQualityPriority RouteStrategy = "quality_priority" // StrategyBalanced routes with balanced cost/quality tradeoff. StrategyBalanced RouteStrategy = "balanced" )
type Router ¶ added in v0.18.0
type Router struct {
// contains filtered or unexported fields
}
Router provides dynamic model routing based on scenario and strategy.
func NewRouter ¶ added in v0.18.0
func NewRouter(config RouterConfig, metrics *MetricsCollector) *Router
NewRouter creates a new model router.
func (*Router) AddModel ¶ added in v0.18.0
func (r *Router) AddModel(model ModelConfig)
AddModel dynamically adds a model to the router.
func (*Router) DetectScenario ¶ added in v0.18.0
DetectScenario infers the scenario from the prompt content.
func (*Router) GetDefaultStrategy ¶ added in v0.18.0
func (r *Router) GetDefaultStrategy() RouteStrategy
GetDefaultStrategy returns the default routing strategy.
func (*Router) GetModels ¶ added in v0.18.0
func (r *Router) GetModels() []ModelConfig
GetModels returns all configured models.
func (*Router) RemoveModel ¶ added in v0.18.0
RemoveModel removes a model by name.
func (*Router) SelectModel ¶ added in v0.18.0
func (r *Router) SelectModel(ctx context.Context, scenario Scenario, strategy RouteStrategy) (*ModelConfig, error)
SelectModel selects the best model for the given scenario and strategy.
type RouterConfig ¶ added in v0.18.0
type RouterConfig struct {
// DefaultStrategy is the default routing strategy.
DefaultStrategy RouteStrategy
// Models is the list of available models.
Models []ModelConfig
// ScenarioModelMap maps scenarios to preferred model names.
ScenarioModelMap map[Scenario]string
// FallbackModel is used when primary model fails.
FallbackModel string
// Logger for routing decisions.
Logger *slog.Logger
}
RouterConfig holds the configuration for the model router.
type Scenario ¶ added in v0.18.0
type Scenario string
Scenario defines the use case scenario for routing.
const ( // ScenarioChat is for conversational chat. ScenarioChat Scenario = "chat" // ScenarioAnalyze is for structured analysis/extraction. ScenarioAnalyze Scenario = "analyze" // ScenarioCode is for code generation/review. ScenarioCode Scenario = "code" // ScenarioReasoning is for complex reasoning tasks. ScenarioReasoning Scenario = "reasoning" )
type SessionCost ¶ added in v0.18.0
type SessionCost struct {
SessionID string
TotalInput int64
TotalOutput int64
TotalCost float64
RequestCount int64
FirstRequest time.Time
LastRequest time.Time
ModelBreakdown map[string]*ModelUsage
BudgetLimit float64 // Optional budget limit
BudgetAlerted bool // Whether budget alert has been triggered
}
SessionCost tracks costs for a single session.