llm

package
v0.18.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 5, 2026 License: MIT Imports: 18 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AtomicRateLimitStats added in v0.18.0

type AtomicRateLimitStats struct {
	TotalRequests    atomic.Int64
	QueuedRequests   atomic.Int64
	RejectedRequests atomic.Int64
	AvgWaitTimeMs    atomic.Float64
}

AtomicRateLimitStats holds atomic stats for concurrent access.

type BudgetAlert added in v0.18.0

type BudgetAlert struct {
	Timestamp   time.Time
	Period      BudgetPeriod
	CurrentCost float64
	Limit       float64
	Percentage  float64
	Message     string
	IsHardLimit bool
}

BudgetAlert represents a budget alert event.

type BudgetAlertThreshold added in v0.18.0

type BudgetAlertThreshold struct {
	// Percentage is the budget percentage (e.g., 80 for 80%).
	Percentage float64
	// Message is the alert message template.
	Message string
}

BudgetAlertThreshold defines alert thresholds.

type BudgetConfig added in v0.18.0

type BudgetConfig struct {
	// Period is the budget period (daily/weekly/monthly/session).
	Period BudgetPeriod
	// Limit is the budget limit in USD.
	Limit float64
	// AlertThresholds is the list of alert thresholds.
	AlertThresholds []BudgetAlertThreshold
	// EnableHardLimit enables hard limit (reject requests over budget).
	EnableHardLimit bool
	// EnableSoftLimit enables soft limit (warn but allow).
	EnableSoftLimit bool
	// Logger for budget events.
	Logger *slog.Logger
}

BudgetConfig holds configuration for budget control.

func DefaultBudgetConfig added in v0.18.0

func DefaultBudgetConfig() BudgetConfig

DefaultBudgetConfig returns sensible defaults.

type BudgetManager added in v0.18.0

type BudgetManager struct {
	// contains filtered or unexported fields
}

BudgetManager manages multiple budget trackers (e.g., per user/session).

func NewBudgetManager added in v0.18.0

func NewBudgetManager(config BudgetConfig) *BudgetManager

NewBudgetManager creates a new budget manager.

func (*BudgetManager) GetAllTrackers added in v0.18.0

func (bm *BudgetManager) GetAllTrackers() map[string]*BudgetTracker

GetAllTrackers returns all budget trackers.

func (*BudgetManager) GetGlobalStats added in v0.18.0

func (bm *BudgetManager) GetGlobalStats() BudgetStats

GetGlobalStats returns global budget statistics.

func (*BudgetManager) GetTracker added in v0.18.0

func (bm *BudgetManager) GetTracker(sessionID string) *BudgetTracker

GetTracker gets or creates a budget tracker for a session.

func (*BudgetManager) RemoveTracker added in v0.18.0

func (bm *BudgetManager) RemoveTracker(sessionID string)

RemoveTracker removes a budget tracker.

func (*BudgetManager) ResetAll added in v0.18.0

func (bm *BudgetManager) ResetAll()

ResetAll resets all budget trackers.

func (*BudgetManager) TrackRequest added in v0.18.0

func (bm *BudgetManager) TrackRequest(sessionID string, cost float64) error

TrackRequest tracks a request across all trackers.

type BudgetPeriod added in v0.18.0

type BudgetPeriod string

BudgetPeriod defines the budget period type.

const (
	// BudgetDaily - Daily budget reset.
	BudgetDaily BudgetPeriod = "daily"
	// BudgetWeekly - Weekly budget reset (Monday).
	BudgetWeekly BudgetPeriod = "weekly"
	// BudgetMonthly - Monthly budget reset (1st).
	BudgetMonthly BudgetPeriod = "monthly"
	// BudgetSession - Session-based budget (no auto-reset).
	BudgetSession BudgetPeriod = "session"
)

type BudgetStats added in v0.18.0

type BudgetStats struct {
	CurrentCost     float64
	Limit           float64
	Remaining       float64
	PercentageUsed  float64
	RequestCount    int64
	PeriodStart     time.Time
	PeriodEnd       time.Time
	IsExceeded      bool
	AlertsTriggered []float64
}

BudgetStats holds budget statistics.

type BudgetTracker added in v0.18.0

type BudgetTracker struct {
	// contains filtered or unexported fields
}

BudgetTracker tracks budget usage and enforces limits.

func NewBudgetTracker added in v0.18.0

func NewBudgetTracker(config BudgetConfig, sessionID string) *BudgetTracker

NewBudgetTracker creates a new budget tracker.

func (*BudgetTracker) CheckAlerts added in v0.18.0

func (bt *BudgetTracker) CheckAlerts()

CheckAlerts checks and triggers budget alerts (public for testing).

func (*BudgetTracker) CheckBudget added in v0.18.0

func (bt *BudgetTracker) CheckBudget(estimatedCost float64) (bool, float64, error)

CheckBudget checks if a request is within budget. Returns (allowed, cost, error).

func (*BudgetTracker) GetRemaining added in v0.18.0

func (bt *BudgetTracker) GetRemaining() float64

GetRemaining returns the remaining budget.

func (*BudgetTracker) GetStats added in v0.18.0

func (bt *BudgetTracker) GetStats() BudgetStats

GetStats returns budget statistics.

func (*BudgetTracker) IsExceeded added in v0.18.0

func (bt *BudgetTracker) IsExceeded() bool

IsExceeded returns true if budget is exceeded.

func (*BudgetTracker) Reset added in v0.18.0

func (bt *BudgetTracker) Reset()

Reset manually resets the budget tracker.

func (*BudgetTracker) SetAlertCallback added in v0.18.0

func (bt *BudgetTracker) SetAlertCallback(callback func(alert BudgetAlert))

SetAlertCallback sets a callback function for budget alerts.

func (*BudgetTracker) SetLimit added in v0.18.0

func (bt *BudgetTracker) SetLimit(limit float64)

SetLimit updates the budget limit.

func (*BudgetTracker) TrackRequest added in v0.18.0

func (bt *BudgetTracker) TrackRequest(cost float64) error

TrackRequest tracks a request's cost against the budget.

type CacheEntry added in v0.18.0

type CacheEntry struct {
	Response string
	JSONData []byte
}

CacheEntry represents a cached response.

type CacheKey added in v0.18.0

type CacheKey struct {
	Prompt    string
	Model     string
	IsAnalyze bool
}

CacheKey represents a unique cache key for LLM requests.

type CachedClient added in v0.18.0

type CachedClient struct {
	// contains filtered or unexported fields
}

CachedClient wraps an LLM client with LRU caching.

func NewCachedClient added in v0.18.0

func NewCachedClient(client interface {
	Chat(ctx context.Context, prompt string) (string, error)
	Analyze(ctx context.Context, prompt string, target any) error
	ChatStream(ctx context.Context, prompt string) (<-chan string, error)
	HealthCheck(ctx context.Context) HealthStatus
}, cacheSize int) *CachedClient

NewCachedClient creates a new cached client wrapper. Set cacheSize to 0 to disable caching.

func (*CachedClient) Analyze added in v0.18.0

func (c *CachedClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements the Analyze method with caching.

func (*CachedClient) CacheStats added in v0.18.0

func (c *CachedClient) CacheStats() (keys int, hits int, misses int)

CacheStats returns current cache statistics.

func (*CachedClient) Chat added in v0.18.0

func (c *CachedClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements the Chat method with caching.

func (*CachedClient) ChatStream added in v0.18.0

func (c *CachedClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream does not use caching (streams are not cacheable).

func (*CachedClient) ClearCache added in v0.18.0

func (c *CachedClient) ClearCache()

ClearCache clears all cached entries.

func (*CachedClient) HealthCheck added in v0.18.0

func (c *CachedClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck delegates to the underlying client.

type CircuitBreaker added in v0.18.0

type CircuitBreaker struct {
	// contains filtered or unexported fields
}

CircuitBreaker wraps gobreaker with additional HotPlex-specific features.

func NewCircuitBreaker added in v0.18.0

func NewCircuitBreaker(config CircuitBreakerConfig) *CircuitBreaker

NewCircuitBreaker creates a new circuit breaker.

func (*CircuitBreaker) Execute added in v0.18.0

func (cb *CircuitBreaker) Execute(ctx context.Context, fn func() error) error

Execute executes a function with circuit breaker protection.

func (*CircuitBreaker) ExecuteWithResult added in v0.18.0

func (cb *CircuitBreaker) ExecuteWithResult(ctx context.Context, fn func() (interface{}, error)) (interface{}, error)

ExecuteWithResult executes a function that returns a value with circuit breaker protection.

func (*CircuitBreaker) ForceClose added in v0.18.0

func (cb *CircuitBreaker) ForceClose()

ForceClose manually closes the circuit (override protection).

func (*CircuitBreaker) ForceOpen added in v0.18.0

func (cb *CircuitBreaker) ForceOpen()

ForceOpen manually opens the circuit (for maintenance/emergency).

func (*CircuitBreaker) GetFailureRate added in v0.18.0

func (cb *CircuitBreaker) GetFailureRate() float64

GetFailureRate returns the current failure rate (last interval).

func (*CircuitBreaker) GetState added in v0.18.0

func (cb *CircuitBreaker) GetState() CircuitState

GetState returns the current circuit state.

func (*CircuitBreaker) GetStats added in v0.18.0

func (cb *CircuitBreaker) GetStats() CircuitBreakerStats

GetStats returns circuit breaker statistics.

func (*CircuitBreaker) IsHealthy added in v0.18.0

func (cb *CircuitBreaker) IsHealthy() bool

IsHealthy returns true if circuit is closed or half-open.

func (*CircuitBreaker) Reset added in v0.18.0

func (cb *CircuitBreaker) Reset()

Reset manually resets the circuit breaker to closed state.

type CircuitBreakerConfig added in v0.18.0

type CircuitBreakerConfig struct {
	// Name is the circuit breaker name (for logging/metrics).
	Name string
	// MaxFailures is the number of failures before opening circuit.
	MaxFailures uint32
	// Interval is the time window for counting failures.
	Interval time.Duration
	// Timeout is how long circuit stays open before half-open.
	Timeout time.Duration
	// HalfOpenMaxRequests is max requests allowed in half-open state.
	HalfOpenMaxRequests uint32
	// SuccessThreshold is successes needed in half-open to close.
	SuccessThreshold uint32
	// Logger for circuit state changes.
	Logger *slog.Logger
}

CircuitBreakerConfig holds configuration for circuit breaker.

func DefaultCircuitBreakerConfig added in v0.18.0

func DefaultCircuitBreakerConfig() CircuitBreakerConfig

DefaultCircuitBreakerConfig returns sensible defaults.

type CircuitBreakerStats added in v0.18.0

type CircuitBreakerStats struct {
	State           CircuitState
	TotalRequests   uint64
	SuccessRequests uint64
	FailRequests    uint64
	CircuitOpens    uint64
	LastStateChange time.Time
	IsForced        bool
}

CircuitBreakerStats holds circuit breaker statistics.

type CircuitState added in v0.18.0

type CircuitState string

CircuitState represents the state of a circuit breaker.

const (
	// CircuitClosed - Normal operation, requests pass through.
	CircuitClosed CircuitState = "closed"
	// CircuitOpen - Circuit is tripped, requests fail fast.
	CircuitOpen CircuitState = "open"
	// CircuitHalfOpen - Testing if service has recovered.
	CircuitHalfOpen CircuitState = "half-open"
)

type CostCalculator added in v0.18.0

type CostCalculator struct {
	// contains filtered or unexported fields
}

CostCalculator calculates costs based on model pricing.

func NewCostCalculator added in v0.18.0

func NewCostCalculator() *CostCalculator

NewCostCalculator creates a new cost calculator.

func (*CostCalculator) AddPricing added in v0.18.0

func (cc *CostCalculator) AddPricing(pricing ModelPricing)

AddPricing adds or updates pricing for a model.

func (*CostCalculator) CalculateCost added in v0.18.0

func (cc *CostCalculator) CalculateCost(modelName string, inputTokens, outputTokens int) (float64, error)

CalculateCost calculates the cost for a request.

func (*CostCalculator) CountTokens added in v0.18.0

func (cc *CostCalculator) CountTokens(text string) int

CountTokens estimates token count from text. This is a simple approximation; for production use, consider using tiktoken.

func (*CostCalculator) CountTokensFromMessages added in v0.18.0

func (cc *CostCalculator) CountTokensFromMessages(messages []openai.ChatCompletionMessage) int

CountTokensFromMessages counts tokens from OpenAI messages.

func (*CostCalculator) GetAllSessions added in v0.18.0

func (cc *CostCalculator) GetAllSessions() map[string]*SessionCost

GetAllSessions returns all session costs.

func (*CostCalculator) GetPricing added in v0.18.0

func (cc *CostCalculator) GetPricing(modelName string) (ModelPricing, bool)

GetPricing returns pricing for a model.

func (*CostCalculator) GetSessionCost added in v0.18.0

func (cc *CostCalculator) GetSessionCost(sessionID string) (*SessionCost, bool)

GetSessionCost returns cost statistics for a session.

func (*CostCalculator) GetTotalCost added in v0.18.0

func (cc *CostCalculator) GetTotalCost() float64

GetTotalCost returns total cost across all sessions.

func (*CostCalculator) ResetSession added in v0.18.0

func (cc *CostCalculator) ResetSession(sessionID string)

ResetSession resets a session's cost tracking.

func (*CostCalculator) SetSessionBudget added in v0.18.0

func (cc *CostCalculator) SetSessionBudget(sessionID string, budget float64)

SetSessionBudget sets a budget limit for a session.

func (*CostCalculator) TrackRequest added in v0.18.0

func (cc *CostCalculator) TrackRequest(sessionID, modelName string, inputTokens, outputTokens int) (*SessionCost, float64, error)

TrackRequest tracks a request's cost for a session.

type FailoverConfig added in v0.18.0

type FailoverConfig struct {
	// Providers is the list of configured providers.
	Providers []ProviderConfig
	// EnableAutoFailover enables automatic failover on errors.
	EnableAutoFailover bool
	// EnableFailback enables automatic failback to primary.
	EnableFailback bool
	// FailbackCooldown is the wait time before attempting failback.
	FailbackCooldown time.Duration
	// HealthCheckInterval is how often to check provider health.
	HealthCheckInterval time.Duration
	// MaxFailoverAttempts is max consecutive failovers before giving up.
	MaxFailoverAttempts int
	// Logger for failover events.
	Logger *slog.Logger
	// HistorySize is the number of failover records to keep.
	HistorySize int
}

FailoverConfig holds configuration for multi-provider failover.

func DefaultFailoverConfig added in v0.18.0

func DefaultFailoverConfig() FailoverConfig

DefaultFailoverConfig returns sensible defaults.

type FailoverHistory added in v0.18.0

type FailoverHistory struct {
	// contains filtered or unexported fields
}

FailoverHistory maintains a circular buffer of failover records.

func NewFailoverHistory added in v0.18.0

func NewFailoverHistory(size int) *FailoverHistory

NewFailoverHistory creates a new failover history buffer.

func (*FailoverHistory) Add added in v0.18.0

func (h *FailoverHistory) Add(record FailoverRecord)

Add adds a failover record.

func (*FailoverHistory) GetRecent added in v0.18.0

func (h *FailoverHistory) GetRecent(count int) []FailoverRecord

GetRecent returns the most recent failover records.

type FailoverManager added in v0.18.0

type FailoverManager struct {
	// contains filtered or unexported fields
}

FailoverManager manages multi-provider failover logic.

func NewFailoverManager added in v0.18.0

func NewFailoverManager(config FailoverConfig) *FailoverManager

NewFailoverManager creates a new failover manager.

func (*FailoverManager) Close added in v0.18.0

func (fm *FailoverManager) Close()

Close stops the health check goroutine and cleans up resources. Should be called when the FailoverManager is no longer needed.

func (*FailoverManager) ExecuteWithFailover added in v0.18.0

func (fm *FailoverManager) ExecuteWithFailover(ctx context.Context, fn func(provider *ProviderConfig) error) error

ExecuteWithFailover executes a function with automatic failover.

func (*FailoverManager) GetCurrentProvider added in v0.18.0

func (fm *FailoverManager) GetCurrentProvider() *ProviderConfig

GetCurrentProvider returns the current active provider.

func (*FailoverManager) GetStats added in v0.18.0

func (fm *FailoverManager) GetStats() FailoverStats

GetStats returns failover statistics.

func (*FailoverManager) ManualFailover added in v0.18.0

func (fm *FailoverManager) ManualFailover(providerName string) error

ManualFailover manually switches to a specific provider.

func (*FailoverManager) Reset added in v0.18.0

func (fm *FailoverManager) Reset()

Reset resets failover state to initial configuration.

func (*FailoverManager) SetCurrentProvider added in v0.18.0

func (fm *FailoverManager) SetCurrentProvider(name string)

SetCurrentProvider sets the current provider (for testing).

func (*FailoverManager) SetLastFailoverTime added in v0.18.0

func (fm *FailoverManager) SetLastFailoverTime(t time.Time)

SetLastFailoverTime sets the last failover time (for testing).

type FailoverRecord added in v0.18.0

type FailoverRecord struct {
	Timestamp    time.Time
	FromProvider string
	ToProvider   string
	Reason       string
	Duration     time.Duration
	Success      bool
}

FailoverRecord represents a failover event record.

type FailoverStats added in v0.18.0

type FailoverStats struct {
	IsActive           bool
	CurrentProvider    string
	FailoverCount      int32
	LastFailoverTime   time.Time
	HealthyProviders   []string
	UnhealthyProviders []string
	RecentFailovers    []FailoverRecord
}

FailoverStats holds failover statistics.

type HealthChecker added in v0.18.0

type HealthChecker interface {
	HealthCheck(ctx context.Context) HealthStatus
}

HealthChecker provides health check capability.

type HealthMonitor added in v0.18.0

type HealthMonitor struct {
	// contains filtered or unexported fields
}

HealthMonitor wraps a client with health monitoring capabilities.

func NewHealthMonitor added in v0.18.0

func NewHealthMonitor(client interface {
	Chat(ctx context.Context, prompt string) (string, error)
	Analyze(ctx context.Context, prompt string, target any) error
	ChatStream(ctx context.Context, prompt string) (<-chan string, error)
	HealthCheck(ctx context.Context) HealthStatus
}, checkInterval time.Duration) *HealthMonitor

NewHealthMonitor creates a health monitor wrapper.

func (*HealthMonitor) Analyze added in v0.18.0

func (h *HealthMonitor) Analyze(ctx context.Context, prompt string, target any) error

Analyze delegates to the underlying client.

func (*HealthMonitor) Chat added in v0.18.0

func (h *HealthMonitor) Chat(ctx context.Context, prompt string) (string, error)

Chat delegates to the underlying client.

func (*HealthMonitor) ChatStream added in v0.18.0

func (h *HealthMonitor) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream delegates to the underlying client.

func (*HealthMonitor) HealthCheck added in v0.18.0

func (h *HealthMonitor) HealthCheck(ctx context.Context) HealthStatus

HealthCheck performs a health check with optional caching.

func (*HealthMonitor) IsHealthy added in v0.18.0

func (h *HealthMonitor) IsHealthy() bool

IsHealthy returns true if the last health check passed.

func (*HealthMonitor) LastHealthCheck added in v0.18.0

func (h *HealthMonitor) LastHealthCheck() time.Time

LastHealthCheck returns the time of the last health check.

type HealthStatus added in v0.18.0

type HealthStatus struct {
	Healthy   bool
	Provider  string
	Model     string
	LatencyMs int64
	Error     string
}

HealthStatus represents the health status of an LLM client. Exported for use by the brain package.

type MetricsCollector added in v0.18.0

type MetricsCollector struct {
	// contains filtered or unexported fields
}

MetricsCollector collects and exports LLM metrics via OpenTelemetry.

func NewMetricsCollector added in v0.18.0

func NewMetricsCollector(config MetricsConfig) *MetricsCollector

NewMetricsCollector creates a new metrics collector.

func (*MetricsCollector) EndRequest added in v0.18.0

func (mc *MetricsCollector) EndRequest()

EndRequest decrements the active request counter.

func (*MetricsCollector) GetStats added in v0.18.0

func (mc *MetricsCollector) GetStats() MetricsStats

GetStats returns current metrics statistics.

func (*MetricsCollector) RecordRequest added in v0.18.0

func (mc *MetricsCollector) RecordRequest(
	model string,
	scenario string,
	inputTokens int64,
	outputTokens int64,
	cost float64,
	latencyMs float64,
	err error,
)

RecordRequest records a completed request with metrics.

func (*MetricsCollector) RecordRoutingDecision added in v0.18.0

func (mc *MetricsCollector) RecordRoutingDecision(scenario Scenario, strategy RouteStrategy, model string)

RecordRoutingDecision records a routing decision.

func (*MetricsCollector) StartRequest added in v0.18.0

func (mc *MetricsCollector) StartRequest()

StartRequest increments the active request counter.

type MetricsConfig added in v0.18.0

type MetricsConfig struct {
	// Enabled enables metrics collection.
	Enabled bool
	// ServiceName is the name for the metrics service.
	ServiceName string
	// MaxLatencySamples is the max number of latency samples to keep locally.
	MaxLatencySamples int
}

MetricsConfig holds configuration for metrics collection.

type MetricsStats added in v0.18.0

type MetricsStats struct {
	TotalRequests     int64
	TotalInputTokens  int64
	TotalOutputTokens int64
	TotalCost         float64
	TotalErrors       int64
	ErrorRate         float64
	AvgLatencyMs      float64
	ActiveRequests    int64
}

MetricsStats represents aggregated metrics statistics.

type ModelConfig added in v0.18.0

type ModelConfig struct {
	// Name is the model identifier (e.g., "gpt-4o-mini", "qwen-plus").
	Name string
	// Provider is the provider name (e.g., "openai", "dashscope").
	Provider string
	// APIKey is the API key for this model.
	APIKey string
	// Endpoint is the optional custom endpoint.
	Endpoint string
	// CostPer1KInput is the cost per 1K input tokens (USD).
	CostPer1KInput float64
	// CostPer1KOutput is the cost per 1K output tokens (USD).
	CostPer1KOutput float64
	// AvgLatencyMs is the average latency in milliseconds.
	AvgLatencyMs int64
	// MaxTokens is the maximum context window size.
	MaxTokens int
	// Enabled indicates if this model is available for routing.
	Enabled bool
}

ModelConfig represents configuration for a single model.

type ModelPricing added in v0.18.0

type ModelPricing struct {
	ModelName       string
	Provider        string
	CostPer1KInput  float64 // USD per 1K input tokens
	CostPer1KOutput float64 // USD per 1K output tokens
	CostPer1KCache  float64 // USD per 1K cached input tokens (if applicable)
	Currency        string
	EffectiveDate   time.Time
}

ModelPricing represents pricing for a model.

func DefaultModelPricing added in v0.18.0

func DefaultModelPricing() []ModelPricing

DefaultModelPricing returns default pricing for common models.

type ModelUsage added in v0.18.0

type ModelUsage struct {
	ModelName    string
	InputTokens  int64
	OutputTokens int64
	Cost         float64
	RequestCount int64
}

ModelUsage tracks usage for a specific model within a session.

type OpenAIClient

type OpenAIClient struct {
	// contains filtered or unexported fields
}

OpenAIClient implements OpenAI-compatible LLM interactions. It can be used for OpenAI, DeepSeek, Groq, etc.

func NewOpenAIClient

func NewOpenAIClient(apiKey, endpoint, model string, logger *slog.Logger) *OpenAIClient

NewOpenAIClient creates a new OpenAI compatible client.

func (*OpenAIClient) Analyze

func (c *OpenAIClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze requests JSON formatted output from the model. It uses "ResponseFormat: {Type: JSON_OBJECT}" to ensure model compatibility for structured reasoning.

func (*OpenAIClient) Chat

func (c *OpenAIClient) Chat(ctx context.Context, prompt string) (string, error)

Chat generates a simple plain text completion.

func (*OpenAIClient) ChatStream added in v0.18.0

func (c *OpenAIClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream returns a channel that streams tokens as they are generated. The channel is closed when the stream completes or an error occurs.

func (*OpenAIClient) HealthCheck added in v0.18.0

func (c *OpenAIClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck performs a simple health check by making a minimal API call.

type Priority added in v0.18.0

type Priority int

Priority defines request priority levels.

const (
	// PriorityHigh - High priority (critical requests).
	PriorityHigh Priority = 1
	// PriorityMedium - Medium priority (normal requests).
	PriorityMedium Priority = 2
	// PriorityLow - Low priority (can be dropped).
	PriorityLow Priority = 3
)

func (Priority) String added in v0.18.0

func (p Priority) String() string

String returns string representation of priority.

type PriorityClient added in v0.18.0

type PriorityClient struct {
	// contains filtered or unexported fields
}

PriorityClient wraps a scheduler with execution capabilities.

func NewPriorityClient added in v0.18.0

func NewPriorityClient(scheduler *PriorityScheduler, timeout time.Duration, logger *slog.Logger) *PriorityClient

NewPriorityClient creates a new priority client.

func (*PriorityClient) ProcessAll added in v0.18.0

func (pc *PriorityClient) ProcessAll(ctx context.Context) error

ProcessAll processes all requests in the queue.

func (*PriorityClient) ProcessNext added in v0.18.0

func (pc *PriorityClient) ProcessNext(ctx context.Context) error

ProcessNext processes the next request in the queue.

func (*PriorityClient) Submit added in v0.18.0

func (pc *PriorityClient) Submit(ctx context.Context, id string, priority Priority, execute func() error) error

Submit submits a request for execution.

func (*PriorityClient) SubmitWithResult added in v0.18.0

func (pc *PriorityClient) SubmitWithResult(ctx context.Context, id string, priority Priority, execute func() error) (<-chan error, error)

SubmitWithResult submits a request and returns the result.

type PriorityConfig added in v0.18.0

type PriorityConfig struct {
	// MaxQueueSize is the maximum queue size.
	MaxQueueSize int
	// EnableLowPriorityDrop enables dropping low priority requests when queue is full.
	EnableLowPriorityDrop bool
	// HighPriorityReserve reserves queue slots for high priority.
	HighPriorityReserve int
	// MaxWaitTime is the maximum wait time for requests.
	MaxWaitTime time.Duration
	// Logger for queue events.
	Logger *slog.Logger
}

PriorityConfig holds configuration for priority queue.

func DefaultPriorityConfig added in v0.18.0

func DefaultPriorityConfig() PriorityConfig

DefaultPriorityConfig returns sensible defaults.

type PriorityQueue added in v0.18.0

type PriorityQueue struct {
	// contains filtered or unexported fields
}

PriorityQueue implements a priority queue for requests.

func (*PriorityQueue) Len added in v0.18.0

func (pq *PriorityQueue) Len() int

Len returns the number of items in the queue.

func (*PriorityQueue) Less added in v0.18.0

func (pq *PriorityQueue) Less(i, j int) bool

Less compares two items (lower priority value = higher priority).

func (*PriorityQueue) Peek added in v0.18.0

func (pq *PriorityQueue) Peek() *PriorityRequest

Peek returns the highest priority item without removing it.

func (*PriorityQueue) Pop added in v0.18.0

func (pq *PriorityQueue) Pop() interface{}

Pop removes and returns the highest priority item.

func (*PriorityQueue) Push added in v0.18.0

func (pq *PriorityQueue) Push(x interface{})

Push adds an item to the queue.

func (*PriorityQueue) Swap added in v0.18.0

func (pq *PriorityQueue) Swap(i, j int)

Swap swaps two items.

type PriorityRequest added in v0.18.0

type PriorityRequest struct {
	ID        string
	Priority  Priority
	Context   context.Context
	Execute   func() error
	CreatedAt time.Time
	Deadline  time.Time
	MaxWait   time.Duration
	// contains filtered or unexported fields
}

PriorityRequest represents a request in the priority queue.

type PriorityScheduler added in v0.18.0

type PriorityScheduler struct {
	// contains filtered or unexported fields
}

PriorityScheduler manages priority-based request scheduling.

func NewPriorityScheduler added in v0.18.0

func NewPriorityScheduler(config PriorityConfig) *PriorityScheduler

NewPriorityScheduler creates a new priority scheduler.

func (*PriorityScheduler) Clear added in v0.18.0

func (ps *PriorityScheduler) Clear()

Clear clears all items from the queue.

func (*PriorityScheduler) Dequeue added in v0.18.0

func (ps *PriorityScheduler) Dequeue(ctx context.Context) (*PriorityRequest, error)

Dequeue removes and returns the highest priority request.

func (*PriorityScheduler) Enqueue added in v0.18.0

func (ps *PriorityScheduler) Enqueue(ctx context.Context, id string, priority Priority, execute func() error, maxWait time.Duration) error

Enqueue adds a request to the priority queue.

func (*PriorityScheduler) GetStats added in v0.18.0

func (ps *PriorityScheduler) GetStats() PriorityStats

GetStats returns scheduler statistics.

func (*PriorityScheduler) Shutdown added in v0.18.0

func (ps *PriorityScheduler) Shutdown()

Shutdown gracefully shuts down the scheduler.

func (*PriorityScheduler) Size added in v0.18.0

func (ps *PriorityScheduler) Size() int

Size returns the current queue size.

func (*PriorityScheduler) TryDequeue added in v0.18.0

func (ps *PriorityScheduler) TryDequeue() *PriorityRequest

TryDequeue tries to dequeue without blocking.

type PriorityStats added in v0.18.0

type PriorityStats struct {
	QueueSize       int32
	MaxQueueSize    int
	Dropped         int64
	Processed       int64
	HighProcessed   int64
	MediumProcessed int64
	LowProcessed    int64
	HighDropped     int64
	MediumDropped   int64
	LowDropped      int64
	IsShutdown      bool
}

PriorityStats holds scheduler statistics.

type ProviderConfig added in v0.18.0

type ProviderConfig struct {
	// Name is the provider identifier (e.g., "openai", "dashscope", "anthropic").
	Name string
	// APIKey is the API key for this provider.
	APIKey string
	// Endpoint is the optional custom endpoint.
	Endpoint string
	// Models is the list of models available from this provider.
	Models []string
	// Priority is the failover priority (1 = primary, higher = backup).
	Priority int
	// Enabled indicates if this provider is available.
	Enabled bool
	// Timeout is the request timeout for this provider.
	Timeout time.Duration
	// MaxRetries is the max retry attempts for this provider.
	MaxRetries int
}

ProviderConfig represents configuration for an LLM provider.

type RateLimitConfig added in v0.18.0

type RateLimitConfig struct {
	// RequestsPerSecond is the sustained request rate.
	RequestsPerSecond float64
	// BurstSize is the maximum burst size.
	BurstSize int
	// MaxQueueSize is the maximum number of waiting requests.
	MaxQueueSize int
	// QueueTimeout is the maximum time a request can wait in queue.
	QueueTimeout time.Duration
	// PerModel enables per-model rate limiting.
	PerModel bool
}

RateLimitConfig holds configuration for rate limiting.

type RateLimitStats added in v0.18.0

type RateLimitStats struct {
	TotalRequests    int64
	QueuedRequests   int64
	RejectedRequests int64
	AvgWaitTimeMs    float64
	LastReset        time.Time
}

RateLimitStats holds rate limiting statistics.

type RateLimitedClient added in v0.18.0

type RateLimitedClient struct {
	// contains filtered or unexported fields
}

RateLimitedClient wraps a client with rate limiting.

func NewRateLimitedClient added in v0.18.0

func NewRateLimitedClient(
	client interface {
		Chat(ctx context.Context, prompt string) (string, error)
		Analyze(ctx context.Context, prompt string, target any) error
		ChatStream(ctx context.Context, prompt string) (<-chan string, error)
		HealthCheck(ctx context.Context) HealthStatus
	},
	limiter *RateLimiter,
) *RateLimitedClient

NewRateLimitedClient creates a new rate-limited client wrapper.

func (*RateLimitedClient) Analyze added in v0.18.0

func (c *RateLimitedClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements rate-limited analyze.

func (*RateLimitedClient) Chat added in v0.18.0

func (c *RateLimitedClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements rate-limited chat.

func (*RateLimitedClient) ChatStream added in v0.18.0

func (c *RateLimitedClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream implements rate-limited streaming.

func (*RateLimitedClient) HealthCheck added in v0.18.0

func (c *RateLimitedClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck implements rate-limited health check.

func (*RateLimitedClient) SetModel added in v0.18.0

func (c *RateLimitedClient) SetModel(model string)

SetModel sets the model for per-model rate limiting.

type RateLimiter added in v0.18.0

type RateLimiter struct {
	// contains filtered or unexported fields
}

RateLimiter provides rate limiting with token bucket algorithm.

func NewRateLimiter added in v0.18.0

func NewRateLimiter(config RateLimitConfig) *RateLimiter

NewRateLimiter creates a new rate limiter.

func (*RateLimiter) Allow added in v0.18.0

func (rl *RateLimiter) Allow() bool

Allow checks if a request can proceed immediately.

func (*RateLimiter) Close added in v0.18.0

func (rl *RateLimiter) Close()

Close closes the rate limiter and stops queue processing.

func (*RateLimiter) GetStats added in v0.18.0

func (rl *RateLimiter) GetStats() RateLimitStats

GetStats returns current rate limiting statistics.

func (*RateLimiter) Remaining added in v0.18.0

func (rl *RateLimiter) Remaining() int

Remaining returns the number of requests remaining in the current burst.

func (*RateLimiter) Reset added in v0.18.0

func (rl *RateLimiter) Reset()

Reset resets the rate limiter immediately.

func (*RateLimiter) ResetStats added in v0.18.0

func (rl *RateLimiter) ResetStats()

ResetStats resets the statistics.

func (*RateLimiter) SetModelRate added in v0.18.0

func (rl *RateLimiter) SetModelRate(model string, requestsPerSecond float64, burstSize int)

SetModelRate sets rate limit for a specific model.

func (*RateLimiter) SetRate added in v0.18.0

func (rl *RateLimiter) SetRate(requestsPerSecond float64, burstSize int)

SetRate updates the rate limit dynamically.

func (*RateLimiter) TryWithRateLimit added in v0.18.0

func (rl *RateLimiter) TryWithRateLimit(ctx context.Context, model string, fn func() error) error

TryWithRateLimit attempts to execute with rate limiting, returns immediately if rate limited.

func (*RateLimiter) Wait added in v0.18.0

func (rl *RateLimiter) Wait(ctx context.Context) error

Wait waits until a request can proceed or context is cancelled.

func (*RateLimiter) WaitModel added in v0.18.0

func (rl *RateLimiter) WaitModel(ctx context.Context, model string) error

WaitModel waits for a specific model's rate limit.

func (*RateLimiter) WithRateLimit added in v0.18.0

func (rl *RateLimiter) WithRateLimit(ctx context.Context, model string, fn func() error) error

WithRateLimit wraps a function with rate limiting.

type RequestTimer added in v0.18.0

type RequestTimer struct {
	// contains filtered or unexported fields
}

RequestTimer helps track request timing.

func NewRequestTimer added in v0.18.0

func NewRequestTimer(metrics *MetricsCollector, model, scenario string) *RequestTimer

NewRequestTimer creates a new request timer.

func (*RequestTimer) Record added in v0.18.0

func (rt *RequestTimer) Record(inputTokens, outputTokens int64, cost float64, err error)

Record completes the timer and records metrics.

type RetryClient added in v0.18.0

type RetryClient struct {
	// contains filtered or unexported fields
}

RetryClient wraps an LLM client with retry and backoff logic.

func NewRetryClient added in v0.18.0

func NewRetryClient(client interface {
	Chat(ctx context.Context, prompt string) (string, error)
	Analyze(ctx context.Context, prompt string, target any) error
	ChatStream(ctx context.Context, prompt string) (<-chan string, error)
	HealthCheck(ctx context.Context) HealthStatus
}, maxRetries, retryMinWaitMs, retryMaxWaitMs int) *RetryClient

NewRetryClient creates a new retry client wrapper. Set maxRetries to 0 to disable retries.

func (*RetryClient) Analyze added in v0.18.0

func (r *RetryClient) Analyze(ctx context.Context, prompt string, target any) error

Analyze implements the Analyze method with retry logic.

func (*RetryClient) Chat added in v0.18.0

func (r *RetryClient) Chat(ctx context.Context, prompt string) (string, error)

Chat implements the Chat method with retry logic.

func (*RetryClient) ChatStream added in v0.18.0

func (r *RetryClient) ChatStream(ctx context.Context, prompt string) (<-chan string, error)

ChatStream implements the ChatStream method with retry logic.

func (*RetryClient) HealthCheck added in v0.18.0

func (r *RetryClient) HealthCheck(ctx context.Context) HealthStatus

HealthCheck delegates to the underlying client.

type RouteStrategy added in v0.18.0

type RouteStrategy string

RouteStrategy defines the routing strategy for model selection.

const (
	// StrategyCostPriority routes to the cheapest model that meets requirements.
	StrategyCostPriority RouteStrategy = "cost_priority"
	// StrategyLatencyPriority routes to the fastest model.
	StrategyLatencyPriority RouteStrategy = "latency_priority"
	// StrategyQualityPriority routes to the highest quality model.
	StrategyQualityPriority RouteStrategy = "quality_priority"
	// StrategyBalanced routes with balanced cost/quality tradeoff.
	StrategyBalanced RouteStrategy = "balanced"
)

type Router added in v0.18.0

type Router struct {
	// contains filtered or unexported fields
}

Router provides dynamic model routing based on scenario and strategy.

func NewRouter added in v0.18.0

func NewRouter(config RouterConfig, metrics *MetricsCollector) *Router

NewRouter creates a new model router.

func (*Router) AddModel added in v0.18.0

func (r *Router) AddModel(model ModelConfig)

AddModel dynamically adds a model to the router.

func (*Router) DetectScenario added in v0.18.0

func (r *Router) DetectScenario(prompt string) Scenario

DetectScenario infers the scenario from the prompt content.

func (*Router) GetDefaultStrategy added in v0.18.0

func (r *Router) GetDefaultStrategy() RouteStrategy

GetDefaultStrategy returns the default routing strategy.

func (*Router) GetModels added in v0.18.0

func (r *Router) GetModels() []ModelConfig

GetModels returns all configured models.

func (*Router) RemoveModel added in v0.18.0

func (r *Router) RemoveModel(name string)

RemoveModel removes a model by name.

func (*Router) SelectModel added in v0.18.0

func (r *Router) SelectModel(ctx context.Context, scenario Scenario, strategy RouteStrategy) (*ModelConfig, error)

SelectModel selects the best model for the given scenario and strategy.

type RouterConfig added in v0.18.0

type RouterConfig struct {
	// DefaultStrategy is the default routing strategy.
	DefaultStrategy RouteStrategy
	// Models is the list of available models.
	Models []ModelConfig
	// ScenarioModelMap maps scenarios to preferred model names.
	ScenarioModelMap map[Scenario]string
	// FallbackModel is used when primary model fails.
	FallbackModel string
	// Logger for routing decisions.
	Logger *slog.Logger
}

RouterConfig holds the configuration for the model router.

type Scenario added in v0.18.0

type Scenario string

Scenario defines the use case scenario for routing.

const (
	// ScenarioChat is for conversational chat.
	ScenarioChat Scenario = "chat"
	// ScenarioAnalyze is for structured analysis/extraction.
	ScenarioAnalyze Scenario = "analyze"
	// ScenarioCode is for code generation/review.
	ScenarioCode Scenario = "code"
	// ScenarioReasoning is for complex reasoning tasks.
	ScenarioReasoning Scenario = "reasoning"
)

type SessionCost added in v0.18.0

type SessionCost struct {
	SessionID      string
	TotalInput     int64
	TotalOutput    int64
	TotalCost      float64
	RequestCount   int64
	FirstRequest   time.Time
	LastRequest    time.Time
	ModelBreakdown map[string]*ModelUsage
	BudgetLimit    float64 // Optional budget limit
	BudgetAlerted  bool    // Whether budget alert has been triggered
}

SessionCost tracks costs for a single session.

type TokenUsage added in v0.18.0

type TokenUsage struct {
	InputTokens  int
	OutputTokens int
	TotalTokens  int
	Model        string
	Timestamp    time.Time
}

TokenUsage represents token usage for a single request.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL