llm

package
v0.24.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 9, 2026 License: MIT Imports: 21 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DefaultCacheSize         = 1000
	DefaultMaxRetries        = 3
	DefaultRetryMinWaitMs    = 100
	DefaultRetryMaxWaitMs    = 5000
	DefaultRPS               = 10.0
	DefaultMaxQueueSize      = 100
	DefaultQueueTimeout      = 30 * time.Second
	DefaultMaxLatencySamples = 1000
)

Default configuration values.

Variables

View Source
var (
	ErrAPIKeyRequired  = errors.New("API key is required")
	ErrModelRequired   = errors.New("model is required")
	ErrInvalidEndpoint = errors.New("invalid endpoint URL")
)

Static errors for validation.

Functions

func DefaultCostEstimator added in v0.22.0

func DefaultCostEstimator(prompt string, model string) float64

DefaultCostEstimator provides a simple cost estimation. Uses approximate pricing: $0.001 per 1K prompt tokens.

func PrioritySchedulerWithClient added in v0.22.0

func PrioritySchedulerWithClient(timeout time.Duration, logger *slog.Logger) (*PriorityScheduler, *PriorityClient)

PrioritySchedulerWithClient creates a priority scheduler and client for request prioritization. Returns both the scheduler (for workers) and client (for submitting requests).

Usage:

scheduler, client := llm.PrioritySchedulerWithClient(5 * time.Minute, nil)
// Submit high priority request
_ = client.Submit(ctx, "req-1", llm.PriorityHigh, func() error {
    _, err := llmClient.Chat(ctx, prompt)
    return err
})
// Worker processes requests
_ = client.ProcessNext(ctx)

Types

type AtomicRateLimitStats added in v0.18.0

type AtomicRateLimitStats struct {
	TotalRequests    atomic.Int64
	QueuedRequests   atomic.Int64
	RejectedRequests atomic.Int64
	AvgWaitTimeMs    atomic.Float64
}

AtomicRateLimitStats holds atomic stats for concurrent access.

type BudgetAlert added in v0.18.0

type BudgetAlert struct {
	Timestamp   time.Time
	Period      BudgetPeriod
	CurrentCost float64
	Limit       float64
	Percentage  float64
	Message     string
	IsHardLimit bool
}

BudgetAlert represents a budget alert event.

type BudgetAlertThreshold added in v0.18.0

type BudgetAlertThreshold struct {
	// Percentage is the budget percentage (e.g., 80 for 80%).
	Percentage float64
	// Message is the alert message template.
	Message string
}

BudgetAlertThreshold defines alert thresholds.

type BudgetClient added in v0.22.0

type BudgetClient struct {
	// contains filtered or unexported fields
}

BudgetClient wraps an LLM client with budget tracking. It checks budget before each request and tracks costs after.

func BudgetClientWithTracker added in v0.22.0

func BudgetClientWithTracker(client LLMClient, tracker *BudgetTracker, model string, estimator CostEstimator) *BudgetClient

BudgetClientWithTracker creates a client with budget tracking. The BudgetTracker must be created separately and managed by the caller.

Usage:

tracker := llm.NewBudgetTracker(llm.BudgetConfig{
    Period:          llm.BudgetDaily,
    Limit:           10.0, // $10 daily
    EnableHardLimit: true,
}, "session-123")
client, _ := llm.BudgetClientWithTracker(baseClient, tracker, "gpt-4", nil)

func NewBudgetClient added in v0.22.0

func NewBudgetClient(client LLMClient, tracker *BudgetTracker, model string, estimator CostEstimator) *BudgetClient

NewBudgetClient creates a new budget-aware client wrapper.

func NewBudgetManagedClient added in v0.22.0

func NewBudgetManagedClient(apiKey, model string, dailyLimit float64) (*BudgetClient, error)

NewBudgetManagedClient creates a client with budget tracking using default configuration. This is a convenience function for simple budget control.

Usage:

client, _ := llm.NewBudgetManagedClient(apiKey, "gpt-4", 10.0) // $10 budget

func (*BudgetClient) Analyze added in v0.22.0

func (b *BudgetClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements the Analyze method with budget tracking.

func (*BudgetClient) Chat added in v0.22.0

func (b *BudgetClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements the Chat method with budget tracking.

func (*BudgetClient) ChatStream added in v0.22.0

func (b *BudgetClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream implements the ChatStream method with budget tracking.

func (*BudgetClient) GetTracker added in v0.22.0

func (b *BudgetClient) GetTracker() *BudgetTracker

GetTracker returns the underlying budget tracker.

func (*BudgetClient) HealthCheck added in v0.22.0

func (b *BudgetClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck delegates to the underlying client.

type BudgetConfig added in v0.18.0

type BudgetConfig struct {
	// Period is the budget period (daily/weekly/monthly/session).
	Period BudgetPeriod
	// Limit is the budget limit in USD.
	Limit float64
	// AlertThresholds is the list of alert thresholds.
	AlertThresholds []BudgetAlertThreshold
	// EnableHardLimit enables hard limit (reject requests over budget).
	EnableHardLimit bool
	// EnableSoftLimit enables soft limit (warn but allow).
	EnableSoftLimit bool
	// Logger for budget events.
	Logger *slog.Logger
}

BudgetConfig holds configuration for budget control.

func DefaultBudgetConfig added in v0.18.0

func DefaultBudgetConfig() BudgetConfig

DefaultBudgetConfig returns sensible defaults.

type BudgetManager added in v0.18.0

type BudgetManager struct {
	// contains filtered or unexported fields
}

BudgetManager manages multiple budget trackers (e.g., per user/session).

func NewBudgetManager added in v0.18.0

func NewBudgetManager(config BudgetConfig) *BudgetManager

NewBudgetManager creates a new budget manager.

func (*BudgetManager) GetAllTrackers added in v0.18.0

func (bm *BudgetManager) GetAllTrackers() map[string]*BudgetTracker

GetAllTrackers returns all budget trackers.

func (*BudgetManager) GetGlobalStats added in v0.18.0

func (bm *BudgetManager) GetGlobalStats() BudgetStats

GetGlobalStats returns global budget statistics.

func (*BudgetManager) GetTracker added in v0.18.0

func (bm *BudgetManager) GetTracker(sessionID string) *BudgetTracker

GetTracker gets or creates a budget tracker for a session.

func (*BudgetManager) RemoveTracker added in v0.18.0

func (bm *BudgetManager) RemoveTracker(sessionID string)

RemoveTracker removes a budget tracker.

func (*BudgetManager) ResetAll added in v0.18.0

func (bm *BudgetManager) ResetAll()

ResetAll resets all budget trackers.

func (*BudgetManager) TrackRequest added in v0.18.0

func (bm *BudgetManager) TrackRequest(sessionID string, cost float64) error

TrackRequest tracks a request across all trackers.

type BudgetPeriod added in v0.18.0

type BudgetPeriod string

BudgetPeriod defines the budget period type.

const (
	// BudgetDaily - Daily budget reset.
	BudgetDaily BudgetPeriod = "daily"
	// BudgetWeekly - Weekly budget reset (Monday).
	BudgetWeekly BudgetPeriod = "weekly"
	// BudgetMonthly - Monthly budget reset (1st).
	BudgetMonthly BudgetPeriod = "monthly"
	// BudgetSession - Session-based budget (no auto-reset).
	BudgetSession BudgetPeriod = "session"
)

type BudgetStats added in v0.18.0

type BudgetStats struct {
	CurrentCost     float64
	Limit           float64
	Remaining       float64
	PercentageUsed  float64
	RequestCount    int64
	PeriodStart     time.Time
	PeriodEnd       time.Time
	IsExceeded      bool
	AlertsTriggered []float64
}

BudgetStats holds budget statistics.

type BudgetTracker added in v0.18.0

type BudgetTracker struct {
	// contains filtered or unexported fields
}

BudgetTracker tracks budget usage and enforces limits.

func NewBudgetTracker added in v0.18.0

func NewBudgetTracker(config BudgetConfig, sessionID string) *BudgetTracker

NewBudgetTracker creates a new budget tracker.

func (*BudgetTracker) CheckAlerts added in v0.18.0

func (bt *BudgetTracker) CheckAlerts()

CheckAlerts checks and triggers budget alerts (public for testing).

func (*BudgetTracker) CheckBudget added in v0.18.0

func (bt *BudgetTracker) CheckBudget(estimatedCost float64) (bool, float64, error)

CheckBudget checks if a request is within budget. Returns (allowed, cost, error).

func (*BudgetTracker) GetRemaining added in v0.18.0

func (bt *BudgetTracker) GetRemaining() float64

GetRemaining returns the remaining budget.

func (*BudgetTracker) GetStats added in v0.18.0

func (bt *BudgetTracker) GetStats() BudgetStats

GetStats returns budget statistics.

func (*BudgetTracker) IsExceeded added in v0.18.0

func (bt *BudgetTracker) IsExceeded() bool

IsExceeded returns true if budget is exceeded.

func (*BudgetTracker) Reset added in v0.18.0

func (bt *BudgetTracker) Reset()

Reset manually resets the budget tracker.

func (*BudgetTracker) SetAlertCallback added in v0.18.0

func (bt *BudgetTracker) SetAlertCallback(callback func(alert BudgetAlert))

SetAlertCallback sets a callback function for budget alerts.

func (*BudgetTracker) SetLimit added in v0.18.0

func (bt *BudgetTracker) SetLimit(limit float64)

SetLimit updates the budget limit.

func (*BudgetTracker) TrackRequest added in v0.18.0

func (bt *BudgetTracker) TrackRequest(cost float64) error

TrackRequest tracks a request's cost against the budget.

type CacheEntry added in v0.18.0

type CacheEntry struct {
	Response string
	JSONData []byte
}

CacheEntry represents a cached response.

type CacheKey added in v0.18.0

type CacheKey struct {
	Prompt    string
	Model     string
	IsAnalyze bool
}

CacheKey represents a unique cache key for LLM requests.

type CacheStats added in v0.22.0

type CacheStats struct {
	Size      int
	Hits      int64
	Misses    int64
	HitRate   float64
	Evictions int64
}

CacheStats represents cache statistics.

type CachedClient added in v0.18.0

type CachedClient struct {
	// contains filtered or unexported fields
}

CachedClient wraps an LLM client with LRU caching.

func NewCachedClient added in v0.18.0

func NewCachedClient(client LLMClient, cacheSize int) *CachedClient

NewCachedClient creates a new cached client wrapper. Set cacheSize to 0 to disable caching.

func (*CachedClient) Analyze added in v0.18.0

func (c *CachedClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements the Analyze method with caching.

func (*CachedClient) CacheStats added in v0.18.0

func (c *CachedClient) CacheStats() (keys int, hits int, misses int)

CacheStats returns current cache statistics.

func (*CachedClient) Chat added in v0.18.0

func (c *CachedClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements the Chat method with caching.

func (*CachedClient) ChatStream added in v0.18.0

func (c *CachedClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream does not use caching (streams are not cacheable).

func (*CachedClient) ClearCache added in v0.18.0

func (c *CachedClient) ClearCache()

ClearCache clears all cached entries.

func (*CachedClient) HealthCheck added in v0.18.0

func (c *CachedClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck delegates to the underlying client.

func (*CachedClient) UnderlyingClient added in v0.22.0

func (c *CachedClient) UnderlyingClient() LLMClient

UnderlyingClient returns the underlying client. Used for component extraction in observable chains.

type CircuitBreaker added in v0.18.0

type CircuitBreaker struct {
	// contains filtered or unexported fields
}

CircuitBreaker wraps gobreaker with additional HotPlex-specific features.

func NewCircuitBreaker added in v0.18.0

func NewCircuitBreaker(config CircuitBreakerConfig) *CircuitBreaker

NewCircuitBreaker creates a new circuit breaker.

func (*CircuitBreaker) Execute added in v0.18.0

func (cb *CircuitBreaker) Execute(ctx context.Context, fn func() error) error

Execute executes a function with circuit breaker protection.

func (*CircuitBreaker) ExecuteWithResult added in v0.18.0

func (cb *CircuitBreaker) ExecuteWithResult(ctx context.Context, fn func() (interface{}, error)) (interface{}, error)

ExecuteWithResult executes a function that returns a value with circuit breaker protection.

func (*CircuitBreaker) ForceClose added in v0.18.0

func (cb *CircuitBreaker) ForceClose()

ForceClose manually closes the circuit (override protection).

func (*CircuitBreaker) ForceOpen added in v0.18.0

func (cb *CircuitBreaker) ForceOpen()

ForceOpen manually opens the circuit (for maintenance/emergency).

func (*CircuitBreaker) GetFailureRate added in v0.18.0

func (cb *CircuitBreaker) GetFailureRate() float64

GetFailureRate returns the current failure rate (last interval).

func (*CircuitBreaker) GetState added in v0.18.0

func (cb *CircuitBreaker) GetState() CircuitState

GetState returns the current circuit state.

func (*CircuitBreaker) GetStats added in v0.18.0

func (cb *CircuitBreaker) GetStats() CircuitBreakerStats

GetStats returns circuit breaker statistics.

func (*CircuitBreaker) IsHealthy added in v0.18.0

func (cb *CircuitBreaker) IsHealthy() bool

IsHealthy returns true if circuit is closed or half-open.

func (*CircuitBreaker) Reset added in v0.18.0

func (cb *CircuitBreaker) Reset()

Reset manually resets the circuit breaker to closed state.

type CircuitBreakerConfig added in v0.18.0

type CircuitBreakerConfig struct {
	// Name is the circuit breaker name (for logging/metrics).
	Name string
	// MaxFailures is the number of failures before opening circuit.
	MaxFailures uint32
	// Interval is the time window for counting failures.
	Interval time.Duration
	// Timeout is how long circuit stays open before half-open.
	Timeout time.Duration
	// HalfOpenMaxRequests is max requests allowed in half-open state.
	HalfOpenMaxRequests uint32
	// SuccessThreshold is successes needed in half-open to close.
	SuccessThreshold uint32
	// Logger for circuit state changes.
	Logger *slog.Logger
}

CircuitBreakerConfig holds configuration for circuit breaker.

func DefaultCircuitBreakerConfig added in v0.18.0

func DefaultCircuitBreakerConfig() CircuitBreakerConfig

DefaultCircuitBreakerConfig returns sensible defaults.

type CircuitBreakerStats added in v0.18.0

type CircuitBreakerStats struct {
	State           CircuitState
	TotalRequests   uint64
	SuccessRequests uint64
	FailRequests    uint64
	CircuitOpens    uint64
	LastStateChange time.Time
	IsForced        bool
}

CircuitBreakerStats holds circuit breaker statistics.

type CircuitClient added in v0.22.0

type CircuitClient struct {
	// contains filtered or unexported fields
}

CircuitClient wraps an LLM client with circuit breaker protection.

func NewCircuitClient added in v0.22.0

func NewCircuitClient(client LLMClient, breaker *CircuitBreaker) *CircuitClient

NewCircuitClient creates a new circuit breaker protected client wrapper.

func (*CircuitClient) Analyze added in v0.22.0

func (c *CircuitClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements the Analyze method with circuit breaker protection.

func (*CircuitClient) Chat added in v0.22.0

func (c *CircuitClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements the Chat method with circuit breaker protection.

func (*CircuitClient) ChatStream added in v0.22.0

func (c *CircuitClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream implements the ChatStream method with circuit breaker protection.

func (*CircuitClient) Client added in v0.22.0

func (c *CircuitClient) Client() LLMClient

Client returns the underlying client for component extraction.

func (*CircuitClient) GetCircuitBreaker added in v0.22.0

func (c *CircuitClient) GetCircuitBreaker() *CircuitBreaker

GetCircuitBreaker returns the underlying circuit breaker for monitoring.

func (*CircuitClient) HealthCheck added in v0.22.0

func (c *CircuitClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck implements the HealthCheck method.

type CircuitState added in v0.18.0

type CircuitState string

CircuitState represents the state of a circuit breaker.

const (
	// CircuitClosed - Normal operation, requests pass through.
	CircuitClosed CircuitState = "closed"
	// CircuitOpen - Circuit is tripped, requests fail fast.
	CircuitOpen CircuitState = "open"
	// CircuitHalfOpen - Testing if service has recovered.
	CircuitHalfOpen CircuitState = "half-open"
)

type ClientBuilder added in v0.22.0

type ClientBuilder struct {
	// contains filtered or unexported fields
}

ClientBuilder provides a fluent API for constructing LLM clients with various middleware layers (rate limiting, caching, circuit breaker, etc.).

func NewClientBuilder added in v0.22.0

func NewClientBuilder() *ClientBuilder

NewClientBuilder creates a new client builder with default logger.

func (*ClientBuilder) Build added in v0.22.0

func (b *ClientBuilder) Build() (LLMClient, error)

Build constructs the LLM client with all configured middleware layers. The wrapping order (from innermost to outermost):

  1. Base client (OpenAI)
  2. Cache (innermost - cache raw responses before retries)
  3. Retry (retry on transient failures)
  4. Rate Limiter (control request rate)
  5. Circuit Breaker (fail fast on repeated errors)
  6. Metrics (outermost - observe all operations)

func (*ClientBuilder) WithAPIKey added in v0.22.0

func (b *ClientBuilder) WithAPIKey(key string) *ClientBuilder

WithAPIKey sets the API key for the LLM provider.

func (*ClientBuilder) WithCache added in v0.22.0

func (b *ClientBuilder) WithCache(size ...int) *ClientBuilder

WithCache enables response caching with optional custom cache size.

func (*ClientBuilder) WithCircuitBreaker added in v0.22.0

func (b *ClientBuilder) WithCircuitBreaker(config ...CircuitBreakerConfig) *ClientBuilder

WithCircuitBreaker enables circuit breaker protection with optional custom configuration.

func (*ClientBuilder) WithEndpoint added in v0.22.0

func (b *ClientBuilder) WithEndpoint(endpoint string) *ClientBuilder

WithEndpoint sets the API endpoint (optional, for non-OpenAI providers).

func (*ClientBuilder) WithLogger added in v0.22.0

func (b *ClientBuilder) WithLogger(logger *slog.Logger) *ClientBuilder

WithLogger sets a custom logger.

func (*ClientBuilder) WithMetrics added in v0.22.0

func (b *ClientBuilder) WithMetrics(config ...MetricsConfig) *ClientBuilder

WithMetrics enables metrics collection with optional custom configuration.

func (*ClientBuilder) WithModel added in v0.22.0

func (b *ClientBuilder) WithModel(model string) *ClientBuilder

WithModel sets the model to use.

func (*ClientBuilder) WithRateLimit added in v0.22.0

func (b *ClientBuilder) WithRateLimit(rps float64) *ClientBuilder

WithRateLimit enables rate limiting with specified requests per second.

func (*ClientBuilder) WithRateLimitConfig added in v0.22.0

func (b *ClientBuilder) WithRateLimitConfig(rps float64, burst int) *ClientBuilder

WithRateLimitConfig enables rate limiting with custom burst size.

func (*ClientBuilder) WithRetry added in v0.22.0

func (b *ClientBuilder) WithRetry(maxRetries int) *ClientBuilder

WithRetry enables retry logic with specified max retries.

func (*ClientBuilder) WithRetryConfig added in v0.22.0

func (b *ClientBuilder) WithRetryConfig(maxRetries, minWaitMs, maxWaitMs int) *ClientBuilder

WithRetryConfig enables retry logic with custom wait times.

type ClientHealth added in v0.22.0

type ClientHealth struct {
	Status         HealthStatus
	CircuitState   CircuitState
	CacheHitRate   float64
	RateLimitUsage float64
	LastError      string
	LastErrorTime  time.Time
	TotalRequests  int64
	SuccessRate    float64
	AvgLatencyMs   float64
}

ClientHealth represents the overall health of an LLM client.

type CostCalculator added in v0.18.0

type CostCalculator struct {
	// contains filtered or unexported fields
}

CostCalculator calculates costs based on model pricing.

func NewCostCalculator added in v0.18.0

func NewCostCalculator() *CostCalculator

NewCostCalculator creates a new cost calculator.

func (*CostCalculator) AddPricing added in v0.18.0

func (cc *CostCalculator) AddPricing(pricing ModelPricing)

AddPricing adds or updates pricing for a model.

func (*CostCalculator) CalculateCost added in v0.18.0

func (cc *CostCalculator) CalculateCost(modelName string, inputTokens, outputTokens int) (float64, error)

CalculateCost calculates the cost for a request.

func (*CostCalculator) CountTokens added in v0.18.0

func (cc *CostCalculator) CountTokens(text string) int

CountTokens estimates token count from text. This is a simple approximation; for production use, consider using tiktoken.

func (*CostCalculator) CountTokensFromMessages added in v0.18.0

func (cc *CostCalculator) CountTokensFromMessages(messages []openai.ChatCompletionMessage) int

CountTokensFromMessages counts tokens from OpenAI messages.

func (*CostCalculator) GetAllSessions added in v0.18.0

func (cc *CostCalculator) GetAllSessions() map[string]*SessionCost

GetAllSessions returns all session costs.

func (*CostCalculator) GetPricing added in v0.18.0

func (cc *CostCalculator) GetPricing(modelName string) (ModelPricing, bool)

GetPricing returns pricing for a model.

func (*CostCalculator) GetSessionCost added in v0.18.0

func (cc *CostCalculator) GetSessionCost(sessionID string) (*SessionCost, bool)

GetSessionCost returns cost statistics for a session.

func (*CostCalculator) GetTotalCost added in v0.18.0

func (cc *CostCalculator) GetTotalCost() float64

GetTotalCost returns total cost across all sessions.

func (*CostCalculator) ResetSession added in v0.18.0

func (cc *CostCalculator) ResetSession(sessionID string)

ResetSession resets a session's cost tracking.

func (*CostCalculator) SetSessionBudget added in v0.18.0

func (cc *CostCalculator) SetSessionBudget(sessionID string, budget float64)

SetSessionBudget sets a budget limit for a session.

func (*CostCalculator) TrackRequest added in v0.18.0

func (cc *CostCalculator) TrackRequest(sessionID, modelName string, inputTokens, outputTokens int) (*SessionCost, float64, error)

TrackRequest tracks a request's cost for a session.

type CostEstimator added in v0.22.0

type CostEstimator func(prompt string, model string) float64

CostEstimator estimates the cost of a request.

type FailoverConfig added in v0.18.0

type FailoverConfig struct {
	// Providers is the list of configured providers.
	Providers []ProviderConfig
	// EnableAutoFailover enables automatic failover on errors.
	EnableAutoFailover bool
	// EnableFailback enables automatic failback to primary.
	EnableFailback bool
	// FailbackCooldown is the wait time before attempting failback.
	FailbackCooldown time.Duration
	// HealthCheckInterval is how often to check provider health.
	HealthCheckInterval time.Duration
	// MaxFailoverAttempts is max consecutive failovers before giving up.
	MaxFailoverAttempts int
	// Logger for failover events.
	Logger *slog.Logger
	// HistorySize is the number of failover records to keep.
	HistorySize int
}

FailoverConfig holds configuration for multi-provider failover.

func DefaultFailoverConfig added in v0.18.0

func DefaultFailoverConfig() FailoverConfig

DefaultFailoverConfig returns sensible defaults.

type FailoverHistory added in v0.18.0

type FailoverHistory struct {
	// contains filtered or unexported fields
}

FailoverHistory maintains a circular buffer of failover records.

func NewFailoverHistory added in v0.18.0

func NewFailoverHistory(size int) *FailoverHistory

NewFailoverHistory creates a new failover history buffer.

func (*FailoverHistory) Add added in v0.18.0

func (h *FailoverHistory) Add(record FailoverRecord)

Add adds a failover record.

func (*FailoverHistory) GetRecent added in v0.18.0

func (h *FailoverHistory) GetRecent(count int) []FailoverRecord

GetRecent returns the most recent failover records.

type FailoverManager added in v0.18.0

type FailoverManager struct {
	// contains filtered or unexported fields
}

FailoverManager manages multi-provider failover logic.

func NewFailoverManager added in v0.18.0

func NewFailoverManager(config FailoverConfig) *FailoverManager

NewFailoverManager creates a new failover manager.

func (*FailoverManager) Close added in v0.18.0

func (fm *FailoverManager) Close()

Close stops the health check goroutine and cleans up resources. Should be called when the FailoverManager is no longer needed.

func (*FailoverManager) ExecuteWithFailover added in v0.18.0

func (fm *FailoverManager) ExecuteWithFailover(ctx context.Context, fn func(provider *ProviderConfig) error) error

ExecuteWithFailover executes a function with automatic failover.

func (*FailoverManager) GetCurrentProvider added in v0.18.0

func (fm *FailoverManager) GetCurrentProvider() *ProviderConfig

GetCurrentProvider returns the current active provider.

func (*FailoverManager) GetStats added in v0.18.0

func (fm *FailoverManager) GetStats() FailoverStats

GetStats returns failover statistics.

func (*FailoverManager) ManualFailover added in v0.18.0

func (fm *FailoverManager) ManualFailover(providerName string) error

ManualFailover manually switches to a specific provider.

func (*FailoverManager) Reset added in v0.18.0

func (fm *FailoverManager) Reset()

Reset resets failover state to initial configuration.

func (*FailoverManager) SetCurrentProvider added in v0.18.0

func (fm *FailoverManager) SetCurrentProvider(name string)

SetCurrentProvider sets the current provider (for testing).

func (*FailoverManager) SetLastFailoverTime added in v0.18.0

func (fm *FailoverManager) SetLastFailoverTime(t time.Time)

SetLastFailoverTime sets the last failover time (for testing).

type FailoverRecord added in v0.18.0

type FailoverRecord struct {
	Timestamp    time.Time
	FromProvider string
	ToProvider   string
	Reason       string
	Duration     time.Duration
	Success      bool
}

FailoverRecord represents a failover event record.

type FailoverStats added in v0.18.0

type FailoverStats struct {
	IsActive           bool
	CurrentProvider    string
	FailoverCount      int32
	LastFailoverTime   time.Time
	HealthyProviders   []string
	UnhealthyProviders []string
	RecentFailovers    []FailoverRecord
}

FailoverStats holds failover statistics.

type HealthChecker added in v0.18.0

type HealthChecker interface {
	HealthCheck(ctx context.Context) HealthStatus
}

HealthChecker provides health check capability.

type HealthMonitor added in v0.18.0

type HealthMonitor struct {
	// contains filtered or unexported fields
}

HealthMonitor wraps a client with health monitoring capabilities.

func NewHealthMonitor added in v0.18.0

func NewHealthMonitor(client interface {
	Chat(ctx context.Context, prompt string) (string, error)
	Analyze(ctx context.Context, prompt string, target any) error
	ChatStream(ctx context.Context, prompt string) (<-chan string, error)
	HealthCheck(ctx context.Context) HealthStatus
}, checkInterval time.Duration) *HealthMonitor

NewHealthMonitor creates a health monitor wrapper.

func (*HealthMonitor) Analyze added in v0.18.0

func (h *HealthMonitor) Analyze(ctx context.Context, prompt string, target any) error

Analyze delegates to the underlying client.

func (*HealthMonitor) Chat added in v0.18.0

func (h *HealthMonitor) Chat(ctx context.Context, prompt string) (string, error)

Chat delegates to the underlying client.

func (*HealthMonitor) ChatStream added in v0.18.0

func (h *HealthMonitor) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream delegates to the underlying client.

func (*HealthMonitor) HealthCheck added in v0.18.0

func (h *HealthMonitor) HealthCheck(ctx context.Context) HealthStatus

HealthCheck performs a health check with optional caching.

func (*HealthMonitor) IsHealthy added in v0.18.0

func (h *HealthMonitor) IsHealthy() bool

IsHealthy returns true if the last health check passed.

func (*HealthMonitor) LastHealthCheck added in v0.18.0

func (h *HealthMonitor) LastHealthCheck() time.Time

LastHealthCheck returns the time of the last health check.

type HealthStatus added in v0.18.0

type HealthStatus struct {
	Healthy   bool
	Provider  string
	Model     string
	LatencyMs int64
	Error     string
}

HealthStatus represents the health status of an LLM client. Exported for use by the brain package.

type LLMClient added in v0.22.0

type LLMClient interface {
	Chat(ctx context.Context, prompt string) (string, error)
	Analyze(ctx context.Context, prompt string, target any) error
	ChatStream(ctx context.Context, prompt string) (<-chan string, error)
	HealthCheck(ctx context.Context) HealthStatus
}

LLMClient defines the interface for LLM interactions. All client wrappers must implement this interface.

func DevelopmentClient added in v0.22.0

func DevelopmentClient(apiKey, model string) (LLMClient, error)

DevelopmentClient creates a development client with minimal overhead. Only metrics are enabled for debugging purposes.

func DevelopmentClientWithEndpoint added in v0.22.0

func DevelopmentClientWithEndpoint(apiKey, endpoint, model string) (LLMClient, error)

DevelopmentClientWithEndpoint creates a development client with custom endpoint.

func HighThroughputClient added in v0.22.0

func HighThroughputClient(apiKey, model string) (LLMClient, error)

HighThroughputClient creates a client optimized for high-throughput scenarios. Aggressive caching, high rate limits, and no retries for speed.

func MinimalClient added in v0.22.0

func MinimalClient(apiKey, model string) (LLMClient, error)

MinimalClient creates a bare-bones client with no middleware. Useful for simple use cases or custom configurations.

func ProductionClient added in v0.22.0

func ProductionClient(apiKey, model string) (LLMClient, error)

ProductionClient creates a production-ready LLM client with all capabilities enabled. Suitable for production workloads requiring reliability, observability, and protection.

func ProductionClientWithEndpoint added in v0.22.0

func ProductionClientWithEndpoint(apiKey, endpoint, model string) (LLMClient, error)

ProductionClientWithEndpoint creates a production-ready client with custom endpoint.

func ReliableClient added in v0.22.0

func ReliableClient(apiKey, model string) (LLMClient, error)

ReliableClient creates a client optimized for reliability. Aggressive retries, circuit breaker, and conservative rate limiting.

func TestingClient added in v0.22.0

func TestingClient(apiKey, model string) (LLMClient, error)

TestingClient creates a client optimized for testing scenarios. Includes cache for deterministic responses and minimal rate limiting.

type MetricsClient added in v0.22.0

type MetricsClient struct {
	// contains filtered or unexported fields
}

MetricsClient wraps an LLM client with metrics collection.

func NewMetricsClient added in v0.22.0

func NewMetricsClient(client LLMClient, metrics *MetricsCollector, model string) *MetricsClient

NewMetricsClient creates a new metrics-enabled client wrapper.

func (*MetricsClient) Analyze added in v0.22.0

func (m *MetricsClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements the Analyze method with metrics collection.

func (*MetricsClient) Chat added in v0.22.0

func (m *MetricsClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements the Chat method with metrics collection.

func (*MetricsClient) ChatStream added in v0.22.0

func (m *MetricsClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream implements the ChatStream method with metrics collection.

func (*MetricsClient) Client added in v0.22.0

func (m *MetricsClient) Client() LLMClient

Client returns the underlying client for component extraction.

func (*MetricsClient) GetMetrics added in v0.22.0

func (m *MetricsClient) GetMetrics() *MetricsCollector

GetMetrics returns the underlying metrics collector for stats retrieval.

func (*MetricsClient) HealthCheck added in v0.22.0

func (m *MetricsClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck implements the HealthCheck method.

type MetricsCollector added in v0.18.0

type MetricsCollector struct {
	// contains filtered or unexported fields
}

MetricsCollector collects and exports LLM metrics via OpenTelemetry.

func NewMetricsCollector added in v0.18.0

func NewMetricsCollector(config MetricsConfig) *MetricsCollector

NewMetricsCollector creates a new metrics collector.

func (*MetricsCollector) EndRequest added in v0.18.0

func (mc *MetricsCollector) EndRequest()

EndRequest decrements the active request counter.

func (*MetricsCollector) GetStats added in v0.18.0

func (mc *MetricsCollector) GetStats() MetricsStats

GetStats returns current metrics statistics.

func (*MetricsCollector) RecordRequest added in v0.18.0

func (mc *MetricsCollector) RecordRequest(
	model string,
	scenario string,
	inputTokens int64,
	outputTokens int64,
	cost float64,
	latencyMs float64,
	err error,
)

RecordRequest records a completed request with metrics.

func (*MetricsCollector) RecordRoutingDecision added in v0.18.0

func (mc *MetricsCollector) RecordRoutingDecision(scenario Scenario, strategy RouteStrategy, model string)

RecordRoutingDecision records a routing decision.

func (*MetricsCollector) StartRequest added in v0.18.0

func (mc *MetricsCollector) StartRequest()

StartRequest increments the active request counter.

type MetricsConfig added in v0.18.0

type MetricsConfig struct {
	// Enabled enables metrics collection.
	Enabled bool
	// ServiceName is the name for the metrics service.
	ServiceName string
	// MaxLatencySamples is the max number of latency samples to keep locally.
	MaxLatencySamples int
}

MetricsConfig holds configuration for metrics collection.

func DefaultMetricsConfig added in v0.22.0

func DefaultMetricsConfig() MetricsConfig

DefaultMetricsConfig returns default metrics configuration.

type MetricsStats added in v0.18.0

type MetricsStats struct {
	TotalRequests     int64
	TotalInputTokens  int64
	TotalOutputTokens int64
	TotalCost         float64
	TotalErrors       int64
	ErrorRate         float64
	AvgLatencyMs      float64
	ActiveRequests    int64
}

MetricsStats represents aggregated metrics statistics.

type ModelConfig added in v0.18.0

type ModelConfig struct {
	// Name is the model identifier (e.g., "gpt-4o-mini", "qwen-plus").
	Name string
	// Provider is the provider name (e.g., "openai", "dashscope").
	Provider string
	// APIKey is the API key for this model.
	APIKey string
	// Endpoint is the optional custom endpoint.
	Endpoint string
	// CostPer1KInput is the cost per 1K input tokens (USD).
	CostPer1KInput float64
	// CostPer1KOutput is the cost per 1K output tokens (USD).
	CostPer1KOutput float64
	// AvgLatencyMs is the average latency in milliseconds.
	AvgLatencyMs int64
	// MaxTokens is the maximum context window size.
	MaxTokens int
	// Enabled indicates if this model is available for routing.
	Enabled bool
}

ModelConfig represents configuration for a single model.

type ModelPricing added in v0.18.0

type ModelPricing struct {
	ModelName       string
	Provider        string
	CostPer1KInput  float64 // USD per 1K input tokens
	CostPer1KOutput float64 // USD per 1K output tokens
	CostPer1KCache  float64 // USD per 1K cached input tokens (if applicable)
	Currency        string
	EffectiveDate   time.Time
}

ModelPricing represents pricing for a model.

func DefaultModelPricing added in v0.18.0

func DefaultModelPricing() []ModelPricing

DefaultModelPricing returns default pricing for common models.

type ModelUsage added in v0.18.0

type ModelUsage struct {
	ModelName    string
	InputTokens  int64
	OutputTokens int64
	Cost         float64
	RequestCount int64
}

ModelUsage tracks usage for a specific model within a session.

type ObservableClient added in v0.22.0

type ObservableClient interface {
	LLMClient

	// Component access
	GetMetricsStats() MetricsStats
	GetCircuitBreakerStats() CircuitBreakerStats
	GetRateLimitStats() RateLimitStats
	GetCacheStats() CacheStats

	// Unified health check
	GetClientHealth(ctx context.Context) ClientHealth
}

ObservableClient extends LLMClient with observability capabilities. It provides unified access to internal component statistics and health.

func AsObservable added in v0.22.0

func AsObservable(client LLMClient) ObservableClient

AsObservable attempts to cast an LLMClient to ObservableClient. Returns the ObservableClient if successful, or wraps it if not already observable.

func NewObservableClient added in v0.22.0

func NewObservableClient(client LLMClient) ObservableClient

NewObservableClient creates an observable client wrapper. It extracts internal components from the client chain.

type ObservableClientImpl added in v0.22.0

type ObservableClientImpl struct {
	// contains filtered or unexported fields
}

ObservableClientImpl wraps an LLMClient with observability capabilities.

func (*ObservableClientImpl) Analyze added in v0.22.0

func (o *ObservableClientImpl) Analyze(ctx context.Context, prompt string, target any) error

Analyze delegates to the underlying client.

func (*ObservableClientImpl) Chat added in v0.22.0

func (o *ObservableClientImpl) Chat(ctx context.Context, prompt string) (string, error)

Chat delegates to the underlying client.

func (*ObservableClientImpl) ChatStream added in v0.22.0

func (o *ObservableClientImpl) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream delegates to the underlying client.

func (*ObservableClientImpl) GetCacheStats added in v0.22.0

func (o *ObservableClientImpl) GetCacheStats() CacheStats

GetCacheStats returns cache statistics if available.

func (*ObservableClientImpl) GetCircuitBreakerStats added in v0.22.0

func (o *ObservableClientImpl) GetCircuitBreakerStats() CircuitBreakerStats

GetCircuitBreakerStats returns circuit breaker statistics if available.

func (*ObservableClientImpl) GetClientHealth added in v0.22.0

func (o *ObservableClientImpl) GetClientHealth(ctx context.Context) ClientHealth

GetClientHealth returns comprehensive health information.

func (*ObservableClientImpl) GetMetricsStats added in v0.22.0

func (o *ObservableClientImpl) GetMetricsStats() MetricsStats

GetMetricsStats returns metrics statistics if available.

func (*ObservableClientImpl) GetRateLimitStats added in v0.22.0

func (o *ObservableClientImpl) GetRateLimitStats() RateLimitStats

GetRateLimitStats returns rate limit statistics if available.

func (*ObservableClientImpl) HealthCheck added in v0.22.0

func (o *ObservableClientImpl) HealthCheck(ctx context.Context) HealthStatus

HealthCheck delegates to the underlying client.

type OpenAIClient

type OpenAIClient struct {
	// contains filtered or unexported fields
}

OpenAIClient implements OpenAI-compatible LLM interactions. It can be used for OpenAI, DeepSeek, Groq, etc.

func NewOpenAIClient

func NewOpenAIClient(apiKey, endpoint, model string, logger *slog.Logger) *OpenAIClient

NewOpenAIClient creates a new OpenAI compatible client.

func (*OpenAIClient) Analyze

func (c *OpenAIClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze requests JSON formatted output from the model. It uses "ResponseFormat: {Type: JSON_OBJECT}" to ensure model compatibility for structured reasoning.

func (*OpenAIClient) Chat

func (c *OpenAIClient) Chat(ctx context.Context, prompt string) (string, error)

Chat generates a simple plain text completion.

func (*OpenAIClient) ChatStream added in v0.18.0

func (c *OpenAIClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream returns a channel that streams tokens as they are generated. The channel is closed when the stream completes or an error occurs.

func (*OpenAIClient) HealthCheck added in v0.18.0

func (c *OpenAIClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck performs a simple health check by making a minimal API call.

type Priority added in v0.18.0

type Priority int

Priority defines request priority levels.

const (
	// PriorityHigh - High priority (critical requests).
	PriorityHigh Priority = 1
	// PriorityMedium - Medium priority (normal requests).
	PriorityMedium Priority = 2
	// PriorityLow - Low priority (can be dropped).
	PriorityLow Priority = 3
)

func (Priority) String added in v0.18.0

func (p Priority) String() string

String returns string representation of priority.

type PriorityClient added in v0.18.0

type PriorityClient struct {
	// contains filtered or unexported fields
}

PriorityClient wraps a scheduler with execution capabilities.

func NewPriorityClient added in v0.18.0

func NewPriorityClient(scheduler *PriorityScheduler, timeout time.Duration, logger *slog.Logger) *PriorityClient

NewPriorityClient creates a new priority client.

func (*PriorityClient) ProcessAll added in v0.18.0

func (pc *PriorityClient) ProcessAll(ctx context.Context) error

ProcessAll processes all requests in the queue.

func (*PriorityClient) ProcessNext added in v0.18.0

func (pc *PriorityClient) ProcessNext(ctx context.Context) error

ProcessNext processes the next request in the queue.

func (*PriorityClient) Submit added in v0.18.0

func (pc *PriorityClient) Submit(ctx context.Context, id string, priority Priority, execute func() error) error

Submit submits a request for execution.

func (*PriorityClient) SubmitWithResult added in v0.18.0

func (pc *PriorityClient) SubmitWithResult(ctx context.Context, id string, priority Priority, execute func() error) (<-chan error, error)

SubmitWithResult submits a request and returns the result.

type PriorityConfig added in v0.18.0

type PriorityConfig struct {
	// MaxQueueSize is the maximum queue size.
	MaxQueueSize int
	// EnableLowPriorityDrop enables dropping low priority requests when queue is full.
	EnableLowPriorityDrop bool
	// HighPriorityReserve reserves queue slots for high priority.
	HighPriorityReserve int
	// MaxWaitTime is the maximum wait time for requests.
	MaxWaitTime time.Duration
	// Logger for queue events.
	Logger *slog.Logger
}

PriorityConfig holds configuration for priority queue.

func DefaultPriorityConfig added in v0.18.0

func DefaultPriorityConfig() PriorityConfig

DefaultPriorityConfig returns sensible defaults.

type PriorityQueue added in v0.18.0

type PriorityQueue struct {
	// contains filtered or unexported fields
}

PriorityQueue implements a priority queue for requests.

func (*PriorityQueue) Len added in v0.18.0

func (pq *PriorityQueue) Len() int

Len returns the number of items in the queue.

func (*PriorityQueue) Less added in v0.18.0

func (pq *PriorityQueue) Less(i, j int) bool

Less compares two items (lower priority value = higher priority).

func (*PriorityQueue) Peek added in v0.18.0

func (pq *PriorityQueue) Peek() *PriorityRequest

Peek returns the highest priority item without removing it.

func (*PriorityQueue) Pop added in v0.18.0

func (pq *PriorityQueue) Pop() interface{}

Pop removes and returns the highest priority item.

func (*PriorityQueue) Push added in v0.18.0

func (pq *PriorityQueue) Push(x interface{})

Push adds an item to the queue.

func (*PriorityQueue) Swap added in v0.18.0

func (pq *PriorityQueue) Swap(i, j int)

Swap swaps two items.

type PriorityRequest added in v0.18.0

type PriorityRequest struct {
	ID        string
	Priority  Priority
	Context   context.Context
	Execute   func() error
	CreatedAt time.Time
	Deadline  time.Time
	MaxWait   time.Duration
	// contains filtered or unexported fields
}

PriorityRequest represents a request in the priority queue.

type PriorityScheduler added in v0.18.0

type PriorityScheduler struct {
	// contains filtered or unexported fields
}

PriorityScheduler manages priority-based request scheduling.

func NewPriorityScheduler added in v0.18.0

func NewPriorityScheduler(config PriorityConfig) *PriorityScheduler

NewPriorityScheduler creates a new priority scheduler.

func (*PriorityScheduler) Clear added in v0.18.0

func (ps *PriorityScheduler) Clear()

Clear clears all items from the queue.

func (*PriorityScheduler) Dequeue added in v0.18.0

func (ps *PriorityScheduler) Dequeue(ctx context.Context) (*PriorityRequest, error)

Dequeue removes and returns the highest priority request.

func (*PriorityScheduler) Enqueue added in v0.18.0

func (ps *PriorityScheduler) Enqueue(ctx context.Context, id string, priority Priority, execute func() error, maxWait time.Duration) error

Enqueue adds a request to the priority queue.

func (*PriorityScheduler) GetStats added in v0.18.0

func (ps *PriorityScheduler) GetStats() PriorityStats

GetStats returns scheduler statistics.

func (*PriorityScheduler) Shutdown added in v0.18.0

func (ps *PriorityScheduler) Shutdown()

Shutdown gracefully shuts down the scheduler.

func (*PriorityScheduler) Size added in v0.18.0

func (ps *PriorityScheduler) Size() int

Size returns the current queue size.

func (*PriorityScheduler) TryDequeue added in v0.18.0

func (ps *PriorityScheduler) TryDequeue() *PriorityRequest

TryDequeue tries to dequeue without blocking.

type PriorityStats added in v0.18.0

type PriorityStats struct {
	QueueSize       int32
	MaxQueueSize    int
	Dropped         int64
	Processed       int64
	HighProcessed   int64
	MediumProcessed int64
	LowProcessed    int64
	HighDropped     int64
	MediumDropped   int64
	LowDropped      int64
	IsShutdown      bool
}

PriorityStats holds scheduler statistics.

type ProviderConfig added in v0.18.0

type ProviderConfig struct {
	// Name is the provider identifier (e.g., "openai", "dashscope", "anthropic").
	Name string
	// APIKey is the API key for this provider.
	APIKey string
	// Endpoint is the optional custom endpoint.
	Endpoint string
	// Models is the list of models available from this provider.
	Models []string
	// Priority is the failover priority (1 = primary, higher = backup).
	Priority int
	// Enabled indicates if this provider is available.
	Enabled bool
	// Timeout is the request timeout for this provider.
	Timeout time.Duration
	// MaxRetries is the max retry attempts for this provider.
	MaxRetries int
}

ProviderConfig represents configuration for an LLM provider.

type RateLimitConfig added in v0.18.0

type RateLimitConfig struct {
	// RequestsPerSecond is the sustained request rate.
	RequestsPerSecond float64
	// BurstSize is the maximum burst size.
	BurstSize int
	// MaxQueueSize is the maximum number of waiting requests.
	MaxQueueSize int
	// QueueTimeout is the maximum time a request can wait in queue.
	QueueTimeout time.Duration
	// PerModel enables per-model rate limiting.
	PerModel bool
}

RateLimitConfig holds configuration for rate limiting.

type RateLimitStats added in v0.18.0

type RateLimitStats struct {
	TotalRequests    int64
	QueuedRequests   int64
	RejectedRequests int64
	AvgWaitTimeMs    float64
	LastReset        time.Time
}

RateLimitStats holds rate limiting statistics.

type RateLimitedClient added in v0.18.0

type RateLimitedClient struct {
	// contains filtered or unexported fields
}

RateLimitedClient wraps a client with rate limiting.

func NewRateLimitedClient added in v0.18.0

func NewRateLimitedClient(client LLMClient, limiter *RateLimiter) *RateLimitedClient

NewRateLimitedClient creates a new rate-limited client wrapper.

func (*RateLimitedClient) Analyze added in v0.18.0

func (c *RateLimitedClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements rate-limited analyze.

func (*RateLimitedClient) Chat added in v0.18.0

func (c *RateLimitedClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements rate-limited chat.

func (*RateLimitedClient) ChatStream added in v0.18.0

func (c *RateLimitedClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream implements rate-limited streaming.

func (*RateLimitedClient) Client added in v0.22.0

func (c *RateLimitedClient) Client() LLMClient

Client returns the underlying client for component extraction.

func (*RateLimitedClient) HealthCheck added in v0.18.0

func (c *RateLimitedClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck implements rate-limited health check.

func (*RateLimitedClient) SetModel added in v0.18.0

func (c *RateLimitedClient) SetModel(model string)

SetModel sets the model for per-model rate limiting.

type RateLimiter added in v0.18.0

type RateLimiter struct {
	// contains filtered or unexported fields
}

RateLimiter provides rate limiting with token bucket algorithm.

func NewRateLimiter added in v0.18.0

func NewRateLimiter(config RateLimitConfig) *RateLimiter

NewRateLimiter creates a new rate limiter.

func (*RateLimiter) Allow added in v0.18.0

func (rl *RateLimiter) Allow() bool

Allow checks if a request can proceed immediately.

func (*RateLimiter) Close added in v0.18.0

func (rl *RateLimiter) Close()

Close closes the rate limiter and stops queue processing.

func (*RateLimiter) GetStats added in v0.18.0

func (rl *RateLimiter) GetStats() RateLimitStats

GetStats returns current rate limiting statistics.

func (*RateLimiter) Remaining added in v0.18.0

func (rl *RateLimiter) Remaining() int

Remaining returns the number of requests remaining in the current burst.

func (*RateLimiter) Reset added in v0.18.0

func (rl *RateLimiter) Reset()

Reset resets the rate limiter immediately.

func (*RateLimiter) ResetStats added in v0.18.0

func (rl *RateLimiter) ResetStats()

ResetStats resets the statistics.

func (*RateLimiter) SetModelRate added in v0.18.0

func (rl *RateLimiter) SetModelRate(model string, requestsPerSecond float64, burstSize int)

SetModelRate sets rate limit for a specific model.

func (*RateLimiter) SetRate added in v0.18.0

func (rl *RateLimiter) SetRate(requestsPerSecond float64, burstSize int)

SetRate updates the rate limit dynamically.

func (*RateLimiter) TryWithRateLimit added in v0.18.0

func (rl *RateLimiter) TryWithRateLimit(ctx context.Context, model string, fn func() error) error

TryWithRateLimit attempts to execute with rate limiting, returns immediately if rate limited.

func (*RateLimiter) Wait added in v0.18.0

func (rl *RateLimiter) Wait(ctx context.Context) error

Wait waits until a request can proceed or context is cancelled.

func (*RateLimiter) WaitModel added in v0.18.0

func (rl *RateLimiter) WaitModel(ctx context.Context, model string) error

WaitModel waits for a specific model's rate limit.

func (*RateLimiter) WithRateLimit added in v0.18.0

func (rl *RateLimiter) WithRateLimit(ctx context.Context, model string, fn func() error) error

WithRateLimit wraps a function with rate limiting.

type RequestTimer added in v0.18.0

type RequestTimer struct {
	// contains filtered or unexported fields
}

RequestTimer helps track request timing.

func NewRequestTimer added in v0.18.0

func NewRequestTimer(metrics *MetricsCollector, model, scenario string) *RequestTimer

NewRequestTimer creates a new request timer.

func (*RequestTimer) Record added in v0.18.0

func (rt *RequestTimer) Record(inputTokens, outputTokens int64, cost float64, err error)

Record completes the timer and records metrics.

type RetryClient added in v0.18.0

type RetryClient struct {
	// contains filtered or unexported fields
}

RetryClient wraps an LLM client with retry and backoff logic.

func NewRetryClient added in v0.18.0

func NewRetryClient(client LLMClient, maxRetries, retryMinWaitMs, retryMaxWaitMs int) *RetryClient

NewRetryClient creates a new retry client wrapper. Set maxRetries to 0 to disable retries.

func (*RetryClient) Analyze added in v0.18.0

func (r *RetryClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements the Analyze method with retry logic.

func (*RetryClient) Chat added in v0.18.0

func (r *RetryClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements the Chat method with retry logic.

func (*RetryClient) ChatStream added in v0.18.0

func (r *RetryClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream implements the ChatStream method with retry logic.

func (*RetryClient) HealthCheck added in v0.18.0

func (r *RetryClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck delegates to the underlying client.

type RouteStrategy added in v0.18.0

type RouteStrategy string

RouteStrategy defines the routing strategy for model selection.

const (
	// StrategyCostPriority routes to the cheapest model that meets requirements.
	StrategyCostPriority RouteStrategy = "cost_priority"
	// StrategyLatencyPriority routes to the fastest model.
	StrategyLatencyPriority RouteStrategy = "latency_priority"
	// StrategyQualityPriority routes to the highest quality model.
	StrategyQualityPriority RouteStrategy = "quality_priority"
	// StrategyBalanced routes with balanced cost/quality tradeoff.
	StrategyBalanced RouteStrategy = "balanced"
)

type Router added in v0.18.0

type Router struct {
	// contains filtered or unexported fields
}

Router provides dynamic model routing based on scenario and strategy.

func NewRouter added in v0.18.0

func NewRouter(config RouterConfig, metrics *MetricsCollector) *Router

NewRouter creates a new model router.

func (*Router) AddModel added in v0.18.0

func (r *Router) AddModel(model ModelConfig)

AddModel dynamically adds a model to the router.

func (*Router) DetectScenario added in v0.18.0

func (r *Router) DetectScenario(prompt string) Scenario

DetectScenario infers the scenario from the prompt content.

func (*Router) GetDefaultStrategy added in v0.18.0

func (r *Router) GetDefaultStrategy() RouteStrategy

GetDefaultStrategy returns the default routing strategy.

func (*Router) GetModels added in v0.18.0

func (r *Router) GetModels() []ModelConfig

GetModels returns all configured models.

func (*Router) RemoveModel added in v0.18.0

func (r *Router) RemoveModel(name string)

RemoveModel removes a model by name.

func (*Router) SelectModel added in v0.18.0

func (r *Router) SelectModel(ctx context.Context, scenario Scenario, strategy RouteStrategy) (*ModelConfig, error)

SelectModel selects the best model for the given scenario and strategy.

type RouterConfig added in v0.18.0

type RouterConfig struct {
	// DefaultStrategy is the default routing strategy.
	DefaultStrategy RouteStrategy
	// Models is the list of available models.
	Models []ModelConfig
	// ScenarioModelMap maps scenarios to preferred model names.
	ScenarioModelMap map[Scenario]string
	// FallbackModel is used when primary model fails.
	FallbackModel string
	// Logger for routing decisions.
	Logger *slog.Logger
}

RouterConfig holds the configuration for the model router.

type Scenario added in v0.18.0

type Scenario string

Scenario defines the use case scenario for routing.

const (
	// ScenarioChat is for conversational chat.
	ScenarioChat Scenario = "chat"
	// ScenarioAnalyze is for structured analysis/extraction.
	ScenarioAnalyze Scenario = "analyze"
	// ScenarioCode is for code generation/review.
	ScenarioCode Scenario = "code"
	// ScenarioReasoning is for complex reasoning tasks.
	ScenarioReasoning Scenario = "reasoning"
)

type SessionCost added in v0.18.0

type SessionCost struct {
	SessionID      string
	TotalInput     int64
	TotalOutput    int64
	TotalCost      float64
	RequestCount   int64
	FirstRequest   time.Time
	LastRequest    time.Time
	ModelBreakdown map[string]*ModelUsage
	BudgetLimit    float64 // Optional budget limit
	BudgetAlerted  bool    // Whether budget alert has been triggered
}

SessionCost tracks costs for a single session.

type TokenUsage added in v0.18.0

type TokenUsage struct {
	InputTokens  int
	OutputTokens int
	TotalTokens  int
	Model        string
	Timestamp    time.Time
}

TokenUsage represents token usage for a single request.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL