Documentation
¶
Index ¶
- Constants
- Variables
- func EstimatePromptTokens(model string, messages []provider.Message) (int, error)
- func GetEmbeddingProvider(name ProviderName, config ProviderConfig) (provider.EmbeddingProvider, error)
- func GetModelContextWindow(model string) int
- func GetProviderPriority(name ProviderName) int
- func IsNonRetryableError(err error) bool
- func IsRetryableError(err error) bool
- func RegisterEmbeddingProvider(name ProviderName, factory EmbeddingProviderFactory, priority int)
- func RegisterProvider(name ProviderName, factory ProviderFactory, priority int)
- type APIError
- type CacheConfig
- type CacheEntry
- type CacheHitError
- type CacheManager
- func (m *CacheManager) BuildCacheKey(req *provider.ChatCompletionRequest) string
- func (m *CacheManager) Config() CacheConfig
- func (m *CacheManager) Delete(ctx context.Context, req *provider.ChatCompletionRequest) error
- func (m *CacheManager) Get(ctx context.Context, req *provider.ChatCompletionRequest) (*CacheEntry, error)
- func (m *CacheManager) Set(ctx context.Context, req *provider.ChatCompletionRequest, ...) error
- func (m *CacheManager) ShouldCache(req *provider.ChatCompletionRequest) bool
- type CacheStats
- type Capabilities
- type ChatClient
- func (c *ChatClient) AppendMessage(ctx context.Context, sessionID string, message provider.Message) error
- func (c *ChatClient) Cache() *CacheManager
- func (c *ChatClient) Close() error
- func (c *ChatClient) CreateChatCompletion(ctx context.Context, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
- func (c *ChatClient) CreateChatCompletionStream(ctx context.Context, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
- func (c *ChatClient) CreateChatCompletionStreamWithMemory(ctx context.Context, sessionID string, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
- func (c *ChatClient) CreateChatCompletionWithMemory(ctx context.Context, sessionID string, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
- func (c *ChatClient) CreateConversationWithSystemMessage(ctx context.Context, sessionID, systemMessage string) error
- func (c *ChatClient) DeleteConversation(ctx context.Context, sessionID string) error
- func (c *ChatClient) GetConversationMessages(ctx context.Context, sessionID string) ([]provider.Message, error)
- func (c *ChatClient) HasCache() bool
- func (c *ChatClient) HasMemory() bool
- func (c *ChatClient) LoadConversation(ctx context.Context, sessionID string) (*ConversationMemory, error)
- func (c *ChatClient) Logger() *slog.Logger
- func (c *ChatClient) Memory() *MemoryManager
- func (c *ChatClient) Provider() provider.Provider
- func (c *ChatClient) SaveConversation(ctx context.Context, conversation *ConversationMemory) error
- func (c *ChatClient) TokenEstimator() TokenEstimator
- type ChatCompletionChoice
- type ChatCompletionChunk
- type ChatCompletionRequest
- type ChatCompletionResponse
- type ChatCompletionStream
- type CircuitBreaker
- type CircuitBreakerConfig
- type CircuitBreakerStats
- type CircuitOpenError
- type CircuitState
- type ClientConfig
- type ConversationMemory
- type EmbeddingProviderFactory
- type ErrorCategory
- type FallbackAttempt
- type FallbackError
- type FallbackProvider
- func (fp *FallbackProvider) CircuitBreaker(providerName string) *CircuitBreaker
- func (fp *FallbackProvider) Close() error
- func (fp *FallbackProvider) CreateChatCompletion(ctx context.Context, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
- func (fp *FallbackProvider) CreateChatCompletionStream(ctx context.Context, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
- func (fp *FallbackProvider) FallbackProviders() []provider.Provider
- func (fp *FallbackProvider) Name() string
- func (fp *FallbackProvider) PrimaryProvider() provider.Provider
- type FallbackProviderConfig
- type LLMCallInfo
- type MemoryConfig
- type MemoryManager
- func (m *MemoryManager) AppendMessage(ctx context.Context, sessionID string, message Message) error
- func (m *MemoryManager) AppendMessages(ctx context.Context, sessionID string, messages []Message) error
- func (m *MemoryManager) CreateConversationWithSystemMessage(ctx context.Context, sessionID, systemMessage string) error
- func (m *MemoryManager) DeleteConversation(ctx context.Context, sessionID string) error
- func (m *MemoryManager) GetMessages(ctx context.Context, sessionID string) ([]Message, error)
- func (m *MemoryManager) LoadConversation(ctx context.Context, sessionID string) (*ConversationMemory, error)
- func (m *MemoryManager) SaveConversation(ctx context.Context, conversation *ConversationMemory) error
- func (m *MemoryManager) SetMetadata(ctx context.Context, sessionID string, metadata map[string]any) error
- type Message
- type ModelInfo
- type ObservabilityHook
- type Provider
- type ProviderConfig
- type ProviderFactory
- type ProviderName
- type ResponseFormat
- type Role
- type TokenEstimator
- type TokenEstimatorConfig
- type TokenLimitError
- type TokenValidation
- type Tool
- type ToolCall
- type ToolFunction
- type ToolSpec
- type Usage
Constants ¶
const ( EnvVarAnthropicAPIKey = "ANTHROPIC_API_KEY" // #nosec G101 EnvVarOpenAIAPIKey = "OPENAI_API_KEY" // #nosec G101 EnvVarGeminiAPIKey = "GEMINI_API_KEY" // #nosec G101 EnvVarXAIAPIKey = "XAI_API_KEY" // #nosec G101 EnvVarKimiAPIKey = "KIMI_API_KEY" // #nosec G101 EnvVarGLMAPIKey = "GLM_API_KEY" // #nosec G101 EnvVarQwenAPIKey = "QWEN_API_KEY" // #nosec G101 )
const ( // Bedrock Models - Re-exported from models package ModelBedrockClaude3Opus = models.BedrockClaude3Opus ModelBedrockClaude3Sonnet = models.BedrockClaude3Sonnet ModelBedrockClaudeOpus4 = models.BedrockClaudeOpus4 ModelBedrockTitan = models.BedrockTitan // Claude Models - Re-exported from models package ModelClaudeOpus4_1 = models.ClaudeOpus4_1 ModelClaudeOpus4 = models.ClaudeOpus4 ModelClaudeSonnet4 = models.ClaudeSonnet4 ModelClaude3_7Sonnet = models.Claude3_7Sonnet ModelClaude3_5Haiku = models.Claude3_5Haiku ModelClaude3Opus = models.Claude3Opus ModelClaude3Sonnet = models.Claude3Sonnet ModelClaude3Haiku = models.Claude3Haiku // Gemini Models - Re-exported from models package ModelGemini2_5Pro = models.Gemini2_5Pro ModelGemini2_5Flash = models.Gemini2_5Flash ModelGeminiLive2_5Flash = models.GeminiLive2_5Flash ModelGemini1_5Pro = models.Gemini1_5Pro ModelGemini1_5Flash = models.Gemini1_5Flash ModelGeminiPro = models.GeminiPro // Ollama Models - Re-exported from models package ModelOllamaLlama3_8B = models.OllamaLlama3_8B ModelOllamaLlama3_70B = models.OllamaLlama3_70B ModelOllamaMistral7B = models.OllamaMistral7B ModelOllamaMixtral8x7B = models.OllamaMixtral8x7B ModelOllamaCodeLlama = models.OllamaCodeLlama ModelOllamaGemma2B = models.OllamaGemma2B ModelOllamaGemma7B = models.OllamaGemma7B ModelOllamaQwen2_5 = models.OllamaQwen2_5 ModelOllamaDeepSeek = models.OllamaDeepSeek // OpenAI Models - Re-exported from models package ModelGPT5 = models.GPT5 ModelGPT5Mini = models.GPT5Mini ModelGPT5Nano = models.GPT5Nano ModelGPT5ChatLatest = models.GPT5ChatLatest ModelGPT4_1 = models.GPT4_1 ModelGPT4_1Mini = models.GPT4_1Mini ModelGPT4_1Nano = models.GPT4_1Nano ModelGPT4o = models.GPT4o ModelGPT4oMini = models.GPT4oMini ModelGPT4Turbo = models.GPT4Turbo ModelGPT35Turbo = models.GPT35Turbo // Vertex AI Models - Re-exported from models package ModelVertexClaudeOpus4 = models.VertexClaudeOpus4 // X.AI Grok Models - Re-exported from models package // Grok 4.1 (Latest - November 2025) ModelGrok4_1FastReasoning = models.Grok4_1FastReasoning ModelGrok4_1FastNonReasoning = models.Grok4_1FastNonReasoning // Grok 4 (July 2025) ModelGrok4_0709 = models.Grok4_0709 ModelGrok4FastReasoning = models.Grok4FastReasoning ModelGrok4FastNonReasoning = models.Grok4FastNonReasoning ModelGrokCodeFast1 = models.GrokCodeFast1 // Grok 3 ModelGrok3 = models.Grok3 ModelGrok3Mini = models.Grok3Mini // Grok 2 ModelGrok2_1212 = models.Grok2_1212 ModelGrok2_Vision = models.Grok2_Vision // Deprecated models ModelGrokBeta = models.GrokBeta ModelGrokVision = models.GrokVision // Kimi / Moonshot AI Models - Re-exported from models package ModelKimiK2_5 = models.KimiK2_5 ModelKimiK2Turbo = models.KimiK2Turbo ModelKimiK2Thinking = models.KimiK2Thinking ModelMoonshotV1_8K = models.MoonshotV1_8K ModelMoonshotV1_32K = models.MoonshotV1_32K ModelMoonshotV1_128K = models.MoonshotV1_128K // Zhipu AI GLM Models - Re-exported from models package ModelGLM5 = models.GLM5 ModelGLM4_7 = models.GLM4_7 ModelGLM4_7FlashX = models.GLM4_7FlashX ModelGLM4_7Flash = models.GLM4_7Flash ModelGLM4_5 = models.GLM4_5 ModelGLM4_5Flash = models.GLM4_5Flash // Alibaba Cloud Qwen Models - Re-exported from models package ModelQwen3Max = models.Qwen3Max ModelQwenMax = models.QwenMax ModelQwenPlus = models.QwenPlus ModelQwenFlash = models.QwenFlash ModelQwQ32B = models.QwQ32B ModelQwen3_235B = models.Qwen3_235B ModelQwen3_32B = models.Qwen3_32B ModelQwen2_5_72B = models.Qwen2_5_72B )
Common model constants for each provider.
NOTE: For new code, prefer importing "github.com/plexusone/omnillm-core/models" directly for better organization and documentation. These constants are maintained for backwards compatibility with existing code.
const ( // PriorityThin is the priority for thin (stdlib-only) provider implementations. PriorityThin = 0 // PriorityThick is the priority for thick (official SDK) provider implementations. PriorityThick = 10 )
Priority constants for provider registration.
const ( RoleSystem = provider.RoleSystem RoleUser = provider.RoleUser RoleAssistant = provider.RoleAssistant RoleTool = provider.RoleTool )
Role constants for convenience
Variables ¶
var ( // Common errors ErrUnsupportedProvider = errors.New("unsupported provider") ErrBedrockExternal = errors.New("bedrock provider moved to github.com/plexusone/omnillm-bedrock; use CustomProvider to inject it") ErrInvalidConfiguration = errors.New("invalid configuration") ErrNoProviders = errors.New("at least one provider must be configured") ErrEmptyAPIKey = errors.New("API key cannot be empty") ErrEmptyModel = errors.New("model cannot be empty") ErrEmptyMessages = errors.New("messages cannot be empty") ErrStreamClosed = errors.New("stream is closed") ErrInvalidResponse = errors.New("invalid response format") ErrRateLimitExceeded = errors.New("rate limit exceeded") ErrQuotaExceeded = errors.New("quota exceeded") ErrInvalidRequest = errors.New("invalid request") ErrModelNotFound = errors.New("model not found") ErrServerError = errors.New("server error") ErrNetworkError = errors.New("network error") // Aliases for thick provider compatibility ErrInvalidAPIKey = ErrEmptyAPIKey )
Functions ¶
func EstimatePromptTokens ¶
EstimatePromptTokens is a convenience function that creates a default estimator and estimates tokens for a set of messages.
func GetEmbeddingProvider ¶ added in v0.16.0
func GetEmbeddingProvider(name ProviderName, config ProviderConfig) (provider.EmbeddingProvider, error)
GetEmbeddingProvider creates an embedding provider instance from the registry. Returns an error if the provider is not registered or if creation fails.
func GetModelContextWindow ¶
GetModelContextWindow is a convenience function that returns the context window for a model using the default estimator.
func GetProviderPriority ¶
func GetProviderPriority(name ProviderName) int
GetProviderPriority returns the priority of the registered provider. Returns -1 if the provider is not registered.
func IsNonRetryableError ¶
IsNonRetryableError returns true if the error is permanent and retrying won't help.
func IsRetryableError ¶
IsRetryableError returns true if the error is transient and the request can be retried. This is useful for fallback provider logic - only retry on retryable errors.
func RegisterEmbeddingProvider ¶ added in v0.16.0
func RegisterEmbeddingProvider(name ProviderName, factory EmbeddingProviderFactory, priority int)
RegisterEmbeddingProvider registers an embedding provider factory with the given name and priority. Higher priority values override lower priority registrations. Thin (stdlib) providers should use priority 0. Thick (SDK) providers should use priority 10.
func RegisterProvider ¶
func RegisterProvider(name ProviderName, factory ProviderFactory, priority int)
RegisterProvider registers a provider factory with the given name and priority. Higher priority values override lower priority registrations. Thin (stdlib) providers should use priority 0. Thick (SDK) providers should use priority 10.
Example:
// In omnillm-core/providers/openai/init.go (thin, priority 0)
func init() {
omnillm.RegisterProvider(omnillm.ProviderNameOpenAI, NewProvider, 0)
}
// In omnillm-openai/init.go (thick, priority 10)
func init() {
omnillm.RegisterProvider(omnillm.ProviderNameOpenAI, NewProvider, 10)
}
Types ¶
type APIError ¶
type APIError struct {
StatusCode int `json:"status_code"`
Message string `json:"message"`
Type string `json:"type"`
Code string `json:"code"`
Provider ProviderName `json:"provider"`
}
APIError represents an error response from the API
func NewAPIError ¶
NewAPIError creates a new API error. This signature is compatible with thick providers that pass (provider, statusCode, errorType, message).
func NewAPIErrorFull ¶
func NewAPIErrorFull(provider ProviderName, statusCode int, message, errorType, code string) *APIError
NewAPIErrorFull creates a new API error with all fields.
type CacheConfig ¶
type CacheConfig struct {
// TTL is the time-to-live for cached responses.
// Default: 1 hour
TTL time.Duration
// KeyPrefix is the prefix for cache keys in the KVS.
// Default: "omnillm:cache"
KeyPrefix string
// SkipStreaming skips caching for streaming requests.
// Default: true (streaming responses are not cached)
SkipStreaming bool
// CacheableModels limits caching to specific models.
// If nil or empty, all models are cached.
CacheableModels []string
// ExcludeParameters lists parameters to exclude from cache key calculation.
// Common exclusions: "user" (user ID shouldn't affect cache)
// Default: ["user"]
ExcludeParameters []string
// IncludeTemperature includes temperature in cache key.
// Set to false if you want to cache regardless of temperature setting.
// Default: true
IncludeTemperature bool
// IncludeSeed includes seed in cache key.
// Default: true
IncludeSeed bool
}
CacheConfig configures response caching behavior
func DefaultCacheConfig ¶
func DefaultCacheConfig() CacheConfig
DefaultCacheConfig returns a CacheConfig with sensible defaults
type CacheEntry ¶
type CacheEntry struct {
// Response is the cached chat completion response
Response *provider.ChatCompletionResponse `json:"response"`
// CachedAt is when the response was cached
CachedAt time.Time `json:"cached_at"`
// ExpiresAt is when the cache entry expires
ExpiresAt time.Time `json:"expires_at"`
// Model is the model used for the request
Model string `json:"model"`
// RequestHash is the hash of the request (for verification)
RequestHash string `json:"request_hash"`
}
CacheEntry represents a cached response with metadata
func (*CacheEntry) IsExpired ¶
func (e *CacheEntry) IsExpired() bool
IsExpired returns true if the cache entry has expired
type CacheHitError ¶
type CacheHitError struct {
Entry *CacheEntry
}
CacheHitError is a marker type to indicate a cache hit (not an actual error)
func (*CacheHitError) Error ¶
func (e *CacheHitError) Error() string
type CacheManager ¶
type CacheManager struct {
// contains filtered or unexported fields
}
CacheManager handles response caching using a KVS backend
func NewCacheManager ¶
func NewCacheManager(kvsClient kvs.Client, config CacheConfig) *CacheManager
NewCacheManager creates a new cache manager with the given KVS client and configuration. If config has zero values, defaults are used for those fields.
func (*CacheManager) BuildCacheKey ¶
func (m *CacheManager) BuildCacheKey(req *provider.ChatCompletionRequest) string
BuildCacheKey generates a deterministic cache key for a request. The key is a hash of the normalized request parameters.
func (*CacheManager) Config ¶
func (m *CacheManager) Config() CacheConfig
Config returns the cache configuration
func (*CacheManager) Delete ¶
func (m *CacheManager) Delete(ctx context.Context, req *provider.ChatCompletionRequest) error
Delete removes a cache entry for the given request.
func (*CacheManager) Get ¶
func (m *CacheManager) Get(ctx context.Context, req *provider.ChatCompletionRequest) (*CacheEntry, error)
Get retrieves a cached response for the given request. Returns nil if no valid cache entry exists.
func (*CacheManager) Set ¶
func (m *CacheManager) Set(ctx context.Context, req *provider.ChatCompletionRequest, resp *provider.ChatCompletionResponse) error
Set stores a response in the cache for the given request.
func (*CacheManager) ShouldCache ¶
func (m *CacheManager) ShouldCache(req *provider.ChatCompletionRequest) bool
ShouldCache determines if a request should be cached. Returns false for streaming requests (if configured), non-cacheable models, etc.
type CacheStats ¶
CacheStats contains statistics about cache usage
type Capabilities ¶
type Capabilities struct {
// Tools indicates support for tool/function calling.
Tools bool
// Streaming indicates support for streaming responses.
Streaming bool
// Vision indicates support for image inputs in messages.
Vision bool
// JSON indicates support for JSON response format.
JSON bool
// SystemRole indicates support for system messages.
SystemRole bool
// MaxContextWindow is the maximum context window size in tokens.
MaxContextWindow int
// SupportsMaxTokens indicates if the provider supports the max_tokens parameter.
SupportsMaxTokens bool
}
Capabilities describes the features supported by a provider. Thick providers can implement a Capabilities() method returning this struct. Note: This is not part of the Provider interface but useful for feature detection.
type ChatClient ¶
type ChatClient struct {
// contains filtered or unexported fields
}
ChatClient is the main client interface that wraps a Provider
func NewClient ¶
func NewClient(config ClientConfig) (*ChatClient, error)
NewClient creates a new ChatClient based on the provider
func (*ChatClient) AppendMessage ¶
func (c *ChatClient) AppendMessage(ctx context.Context, sessionID string, message provider.Message) error
AppendMessage appends a message to a conversation in memory
func (*ChatClient) Cache ¶
func (c *ChatClient) Cache() *CacheManager
Cache returns the cache manager (nil if not configured)
func (*ChatClient) CreateChatCompletion ¶
func (c *ChatClient) CreateChatCompletion(ctx context.Context, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
CreateChatCompletion creates a chat completion
func (*ChatClient) CreateChatCompletionStream ¶
func (c *ChatClient) CreateChatCompletionStream(ctx context.Context, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
CreateChatCompletionStream creates a streaming chat completion
func (*ChatClient) CreateChatCompletionStreamWithMemory ¶
func (c *ChatClient) CreateChatCompletionStreamWithMemory(ctx context.Context, sessionID string, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
CreateChatCompletionStreamWithMemory creates a streaming chat completion using conversation memory
func (*ChatClient) CreateChatCompletionWithMemory ¶
func (c *ChatClient) CreateChatCompletionWithMemory(ctx context.Context, sessionID string, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
CreateChatCompletionWithMemory creates a chat completion using conversation memory
func (*ChatClient) CreateConversationWithSystemMessage ¶
func (c *ChatClient) CreateConversationWithSystemMessage(ctx context.Context, sessionID, systemMessage string) error
CreateConversationWithSystemMessage creates a new conversation with a system message
func (*ChatClient) DeleteConversation ¶
func (c *ChatClient) DeleteConversation(ctx context.Context, sessionID string) error
DeleteConversation removes a conversation from memory
func (*ChatClient) GetConversationMessages ¶
func (c *ChatClient) GetConversationMessages(ctx context.Context, sessionID string) ([]provider.Message, error)
GetConversationMessages retrieves messages from a conversation
func (*ChatClient) HasCache ¶
func (c *ChatClient) HasCache() bool
HasCache returns true if caching is configured
func (*ChatClient) HasMemory ¶
func (c *ChatClient) HasMemory() bool
HasMemory returns true if memory is configured
func (*ChatClient) LoadConversation ¶
func (c *ChatClient) LoadConversation(ctx context.Context, sessionID string) (*ConversationMemory, error)
LoadConversation loads a conversation from memory
func (*ChatClient) Logger ¶
func (c *ChatClient) Logger() *slog.Logger
Logger returns the client's logger
func (*ChatClient) Memory ¶
func (c *ChatClient) Memory() *MemoryManager
Memory returns the memory manager (nil if not configured)
func (*ChatClient) Provider ¶
func (c *ChatClient) Provider() provider.Provider
Provider returns the underlying provider
func (*ChatClient) SaveConversation ¶
func (c *ChatClient) SaveConversation(ctx context.Context, conversation *ConversationMemory) error
SaveConversation saves a conversation to memory
func (*ChatClient) TokenEstimator ¶
func (c *ChatClient) TokenEstimator() TokenEstimator
TokenEstimator returns the token estimator (nil if not configured)
type ChatCompletionChoice ¶
type ChatCompletionChoice = provider.ChatCompletionChoice
Type aliases for backward compatibility and convenience. These allow thick providers to import from omnillm-core root package. Note: Provider and ChatCompletionStream are defined in provider.go
type ChatCompletionChunk ¶
type ChatCompletionChunk = provider.ChatCompletionChunk
Type aliases for backward compatibility and convenience. These allow thick providers to import from omnillm-core root package. Note: Provider and ChatCompletionStream are defined in provider.go
type ChatCompletionRequest ¶
type ChatCompletionRequest = provider.ChatCompletionRequest
Request/Response types
type ChatCompletionResponse ¶
type ChatCompletionResponse = provider.ChatCompletionResponse
Type aliases for backward compatibility and convenience. These allow thick providers to import from omnillm-core root package. Note: Provider and ChatCompletionStream are defined in provider.go
type ChatCompletionStream ¶
type ChatCompletionStream = provider.ChatCompletionStream
ChatCompletionStream is an alias to the provider.ChatCompletionStream interface for backward compatibility
type CircuitBreaker ¶
type CircuitBreaker struct {
// contains filtered or unexported fields
}
CircuitBreaker implements the circuit breaker pattern for provider health tracking
func NewCircuitBreaker ¶
func NewCircuitBreaker(config CircuitBreakerConfig) *CircuitBreaker
NewCircuitBreaker creates a new circuit breaker with the given configuration. If config has zero values, defaults are used for those fields.
func (*CircuitBreaker) AllowRequest ¶
func (cb *CircuitBreaker) AllowRequest() bool
AllowRequest returns true if the request should be allowed to proceed. In closed state, always allows. In open state, allows only after timeout. In half-open state, allows a limited number of test requests.
func (*CircuitBreaker) RecordFailure ¶
func (cb *CircuitBreaker) RecordFailure()
RecordFailure records a failed request. May open the circuit if thresholds are exceeded.
func (*CircuitBreaker) RecordSuccess ¶
func (cb *CircuitBreaker) RecordSuccess()
RecordSuccess records a successful request. In half-open state, may close the circuit if enough successes.
func (*CircuitBreaker) Reset ¶
func (cb *CircuitBreaker) Reset()
Reset resets the circuit breaker to closed state with cleared counters
func (*CircuitBreaker) State ¶
func (cb *CircuitBreaker) State() CircuitState
State returns the current state of the circuit breaker
func (*CircuitBreaker) Stats ¶
func (cb *CircuitBreaker) Stats() CircuitBreakerStats
Stats returns current statistics for monitoring
type CircuitBreakerConfig ¶
type CircuitBreakerConfig struct {
// FailureThreshold is the number of consecutive failures before opening the circuit.
// Default: 5
FailureThreshold int
// SuccessThreshold is the number of consecutive successes in half-open state
// required to close the circuit.
// Default: 2
SuccessThreshold int
// Timeout is how long to wait in open state before transitioning to half-open.
// Default: 30 seconds
Timeout time.Duration
// FailureRateThreshold triggers circuit open when the failure rate exceeds this value (0-1).
// Only evaluated after MinimumRequests is reached.
// Default: 0.5 (50%)
FailureRateThreshold float64
// MinimumRequests is the minimum number of requests before failure rate is evaluated.
// Default: 10
MinimumRequests int
}
CircuitBreakerConfig configures circuit breaker behavior
func DefaultCircuitBreakerConfig ¶
func DefaultCircuitBreakerConfig() CircuitBreakerConfig
DefaultCircuitBreakerConfig returns a CircuitBreakerConfig with sensible defaults
type CircuitBreakerStats ¶
type CircuitBreakerStats struct {
State CircuitState
ConsecutiveFailures int
ConsecutiveSuccesses int
TotalRequests int
TotalFailures int
FailureRate float64
LastFailure time.Time
LastStateChange time.Time
}
CircuitBreakerStats contains statistics about the circuit breaker
type CircuitOpenError ¶
type CircuitOpenError struct {
Provider string
State CircuitState
LastFailure time.Time
RetryAfter time.Duration
}
CircuitOpenError is returned when a request is rejected due to open circuit
func (*CircuitOpenError) Error ¶
func (e *CircuitOpenError) Error() string
type CircuitState ¶
type CircuitState int
CircuitState represents the state of a circuit breaker
const ( // CircuitClosed indicates normal operation - requests pass through CircuitClosed CircuitState = iota // CircuitOpen indicates the circuit is open - requests fail fast CircuitOpen // CircuitHalfOpen indicates the circuit is testing recovery CircuitHalfOpen )
func (CircuitState) String ¶
func (s CircuitState) String() string
String returns the string representation of the circuit state
type ClientConfig ¶
type ClientConfig struct {
// Providers is an ordered list of providers. Index 0 is the primary provider,
// and indices 1+ are fallback providers tried in order on retryable errors.
// This is the preferred way to configure providers.
//
// Example:
// Providers: []ProviderConfig{
// {Provider: ProviderNameOpenAI, APIKey: "openai-key"}, // Primary
// {Provider: ProviderNameAnthropic, APIKey: "anthropic-key"}, // Fallback 1
// {Provider: ProviderNameGemini, APIKey: "gemini-key"}, // Fallback 2
// }
//
// For custom providers, use CustomProvider field in ProviderConfig:
// Providers: []ProviderConfig{
// {CustomProvider: myCustomProvider},
// }
Providers []ProviderConfig
// CircuitBreakerConfig configures circuit breaker behavior for fallback providers.
// If nil (default), circuit breaker is disabled.
// When enabled, providers that fail repeatedly are temporarily skipped.
CircuitBreakerConfig *CircuitBreakerConfig
// Memory configuration (optional)
Memory kvs.Client
MemoryConfig *MemoryConfig
// ObservabilityHook is called before/after LLM calls (optional)
ObservabilityHook ObservabilityHook
// Logger for internal logging (optional, defaults to null logger)
Logger *slog.Logger
// TokenEstimator enables pre-flight token estimation (optional).
// Use NewTokenEstimator() to create one with custom configuration.
TokenEstimator TokenEstimator
// ValidateTokens enables automatic token validation before requests.
// When true and TokenEstimator is set, requests that would exceed
// the model's context window are rejected with TokenLimitError.
// Default: false
ValidateTokens bool
// Cache is the KVS client for response caching (optional).
// If provided, identical requests will return cached responses.
// Uses the same kvs.Client interface as Memory.
Cache kvs.Client
// CacheConfig configures response caching behavior.
// If nil, DefaultCacheConfig() is used when Cache is provided.
CacheConfig *CacheConfig
}
ClientConfig holds configuration for creating a client
type ConversationMemory ¶
type ConversationMemory struct {
SessionID string `json:"session_id"`
Messages []Message `json:"messages"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
Metadata map[string]any `json:"metadata,omitempty"`
}
ConversationMemory represents stored conversation data
type EmbeddingProviderFactory ¶ added in v0.16.0
type EmbeddingProviderFactory func(config ProviderConfig) (provider.EmbeddingProvider, error)
EmbeddingProviderFactory is a function that creates an embedding provider from config.
func GetEmbeddingProviderFactory ¶ added in v0.16.0
func GetEmbeddingProviderFactory(name ProviderName) EmbeddingProviderFactory
GetEmbeddingProviderFactory returns the registered factory for the given provider name. Returns nil if no embedding provider is registered with that name.
type ErrorCategory ¶
type ErrorCategory int
ErrorCategory classifies errors for retry/fallback logic
const ( // ErrorCategoryUnknown indicates the error type could not be determined ErrorCategoryUnknown ErrorCategory = iota // ErrorCategoryRetryable indicates the error is transient and the request can be retried // Examples: rate limits (429), server errors (5xx), network errors ErrorCategoryRetryable // ErrorCategoryNonRetryable indicates the error is permanent and retrying won't help // Examples: auth errors (401/403), invalid requests (400), not found (404) ErrorCategoryNonRetryable )
func ClassifyError ¶
func ClassifyError(err error) ErrorCategory
ClassifyError determines the category of an error for retry/fallback decisions
func (ErrorCategory) String ¶
func (c ErrorCategory) String() string
String returns the string representation of the error category
type FallbackAttempt ¶
type FallbackAttempt struct {
// Provider is the name of the provider that was tried
Provider string
// Error is the error returned, or nil on success
Error error
// Duration is how long the attempt took
Duration time.Duration
// Skipped indicates the provider was skipped (e.g., circuit open)
Skipped bool
}
FallbackAttempt records information about a single fallback attempt
type FallbackError ¶
type FallbackError struct {
// Attempts contains information about each provider attempt
Attempts []FallbackAttempt
// LastError is the last error encountered
LastError error
}
FallbackError is returned when all providers fail
func (*FallbackError) Error ¶
func (e *FallbackError) Error() string
func (*FallbackError) Unwrap ¶
func (e *FallbackError) Unwrap() error
type FallbackProvider ¶
type FallbackProvider struct {
// contains filtered or unexported fields
}
FallbackProvider wraps multiple providers with fallback logic. It implements provider.Provider and tries providers in order until one succeeds.
func NewFallbackProvider ¶
func NewFallbackProvider( primary provider.Provider, fallbacks []provider.Provider, config *FallbackProviderConfig, ) *FallbackProvider
NewFallbackProvider creates a provider that tries fallbacks on failure. The primary provider is tried first, then fallbacks in order.
func (*FallbackProvider) CircuitBreaker ¶
func (fp *FallbackProvider) CircuitBreaker(providerName string) *CircuitBreaker
CircuitBreaker returns the circuit breaker for a provider, or nil if not configured
func (*FallbackProvider) Close ¶
func (fp *FallbackProvider) Close() error
Close closes all providers
func (*FallbackProvider) CreateChatCompletion ¶
func (fp *FallbackProvider) CreateChatCompletion( ctx context.Context, req *provider.ChatCompletionRequest, ) (*provider.ChatCompletionResponse, error)
CreateChatCompletion tries the primary provider first, then fallbacks on retryable errors.
func (*FallbackProvider) CreateChatCompletionStream ¶
func (fp *FallbackProvider) CreateChatCompletionStream( ctx context.Context, req *provider.ChatCompletionRequest, ) (provider.ChatCompletionStream, error)
CreateChatCompletionStream tries the primary provider first, then fallbacks on retryable errors.
func (*FallbackProvider) FallbackProviders ¶
func (fp *FallbackProvider) FallbackProviders() []provider.Provider
FallbackProviders returns the fallback providers
func (*FallbackProvider) Name ¶
func (fp *FallbackProvider) Name() string
Name returns a composite name indicating fallback configuration
func (*FallbackProvider) PrimaryProvider ¶
func (fp *FallbackProvider) PrimaryProvider() provider.Provider
PrimaryProvider returns the primary provider
type FallbackProviderConfig ¶
type FallbackProviderConfig struct {
// CircuitBreakerConfig configures circuit breaker behavior.
// If nil, circuit breaker is disabled.
CircuitBreakerConfig *CircuitBreakerConfig
// Logger for logging fallback events
Logger *slog.Logger
}
FallbackProviderConfig configures the fallback provider behavior
type LLMCallInfo ¶
type LLMCallInfo struct {
CallID string // Unique identifier for correlating BeforeRequest/AfterResponse
ProviderName string // e.g., "openai", "anthropic"
StartTime time.Time // When the call started
}
LLMCallInfo provides metadata about the LLM call for observability
type MemoryConfig ¶
type MemoryConfig struct {
// MaxMessages limits the number of messages to keep in memory per session
MaxMessages int
// TTL sets the time-to-live for stored conversations (0 for no expiration)
TTL time.Duration
// KeyPrefix allows customizing the key prefix for stored conversations
KeyPrefix string
}
MemoryConfig holds configuration for conversation memory
func DefaultMemoryConfig ¶
func DefaultMemoryConfig() MemoryConfig
DefaultMemoryConfig returns sensible defaults for memory configuration
type MemoryManager ¶
type MemoryManager struct {
// contains filtered or unexported fields
}
MemoryManager handles conversation persistence using KVS
func NewMemoryManager ¶
func NewMemoryManager(kvsClient kvs.Client, config MemoryConfig) *MemoryManager
NewMemoryManager creates a new memory manager with the given KVS client and config
func (*MemoryManager) AppendMessage ¶
AppendMessage adds a message to the conversation and saves it
func (*MemoryManager) AppendMessages ¶
func (m *MemoryManager) AppendMessages(ctx context.Context, sessionID string, messages []Message) error
AppendMessages adds multiple messages to the conversation and saves it
func (*MemoryManager) CreateConversationWithSystemMessage ¶
func (m *MemoryManager) CreateConversationWithSystemMessage(ctx context.Context, sessionID, systemMessage string) error
CreateConversationWithSystemMessage creates a new conversation with a system message
func (*MemoryManager) DeleteConversation ¶
func (m *MemoryManager) DeleteConversation(ctx context.Context, sessionID string) error
DeleteConversation removes a conversation from memory
func (*MemoryManager) GetMessages ¶
GetMessages returns just the messages from a conversation
func (*MemoryManager) LoadConversation ¶
func (m *MemoryManager) LoadConversation(ctx context.Context, sessionID string) (*ConversationMemory, error)
LoadConversation retrieves a conversation from memory
func (*MemoryManager) SaveConversation ¶
func (m *MemoryManager) SaveConversation(ctx context.Context, conversation *ConversationMemory) error
SaveConversation stores a conversation in memory
func (*MemoryManager) SetMetadata ¶
func (m *MemoryManager) SetMetadata(ctx context.Context, sessionID string, metadata map[string]any) error
SetMetadata sets metadata for a conversation
type Message ¶
Type aliases for backward compatibility and convenience. These allow thick providers to import from omnillm-core root package. Note: Provider and ChatCompletionStream are defined in provider.go
type ModelInfo ¶
type ModelInfo struct {
ID string `json:"id"`
Provider ProviderName `json:"provider"`
Name string `json:"name"`
MaxTokens int `json:"max_tokens"`
}
ModelInfo represents information about a model
func GetModelInfo ¶
GetModelInfo returns model information
type ObservabilityHook ¶
type ObservabilityHook interface {
// BeforeRequest is called before each LLM call.
// Returns a new context for trace/span propagation.
// The hook should not modify the request.
BeforeRequest(ctx context.Context, info LLMCallInfo, req *provider.ChatCompletionRequest) context.Context
// AfterResponse is called after each LLM call completes.
// This is called for both successful and failed requests.
AfterResponse(ctx context.Context, info LLMCallInfo, req *provider.ChatCompletionRequest, resp *provider.ChatCompletionResponse, err error)
// WrapStream wraps a stream for observability.
// This allows the hook to observe streaming responses.
// The returned stream must implement the same interface as the input.
//
// Note: For streaming, AfterResponse is only called if stream creation fails.
// To track streaming completion timing and content, the wrapper returned here
// should handle Close() or detect EOF in Recv() to finalize metrics/traces.
WrapStream(ctx context.Context, info LLMCallInfo, req *provider.ChatCompletionRequest, stream provider.ChatCompletionStream) provider.ChatCompletionStream
}
ObservabilityHook allows external packages to observe LLM calls. Implementations can use this to add tracing, logging, or metrics without modifying the core OmniLLM library.
type ProviderConfig ¶
type ProviderConfig struct {
// Provider is the provider type (e.g., ProviderNameOpenAI).
// Ignored if CustomProvider is set.
Provider ProviderName
// APIKey is the API key for the provider
APIKey string //nolint:gosec // G117: config field for API key, not a hardcoded credential
// BaseURL is an optional custom base URL
BaseURL string
// Region is for providers that require a region (e.g., AWS Bedrock)
Region string
// Timeout sets the HTTP client timeout for this provider
Timeout time.Duration
// HTTPClient is an optional custom HTTP client
HTTPClient *http.Client
// Extra holds provider-specific configuration
Extra map[string]any
// CustomProvider allows injecting a custom provider implementation.
// When set, Provider, APIKey, BaseURL, etc. are ignored.
CustomProvider provider.Provider
}
ProviderConfig holds configuration for a single provider instance. Used in the Providers slice where index 0 is primary and 1+ are fallbacks.
type ProviderFactory ¶
type ProviderFactory func(config ProviderConfig) (provider.Provider, error)
ProviderFactory is a function that creates a provider from config.
func GetProviderFactory ¶
func GetProviderFactory(name ProviderName) ProviderFactory
GetProviderFactory returns the registered factory for the given provider name. Returns nil if no provider is registered with that name.
type ProviderName ¶
type ProviderName string
ProviderName represents the different LLM provider names
const ( ProviderNameOpenAI ProviderName = "openai" ProviderNameAnthropic ProviderName = "anthropic" ProviderNameBedrock ProviderName = "bedrock" ProviderNameOllama ProviderName = "ollama" ProviderNameGemini ProviderName = "gemini" ProviderNameXAI ProviderName = "xai" ProviderNameKimi ProviderName = "kimi" ProviderNameGLM ProviderName = "glm" ProviderNameQwen ProviderName = "qwen" )
func ListEmbeddingProviders ¶ added in v0.16.0
func ListEmbeddingProviders() []ProviderName
ListEmbeddingProviders returns a list of all registered embedding provider names.
func ListRegisteredProviders ¶
func ListRegisteredProviders() []ProviderName
ListRegisteredProviders returns a list of all registered provider names.
type TokenEstimator ¶
type TokenEstimator interface {
// EstimateTokens estimates the token count for a set of messages.
// The estimate may not be exact but should be reasonably close.
EstimateTokens(model string, messages []provider.Message) (int, error)
// GetContextWindow returns the maximum context window size for a model.
// Returns 0 if the model is unknown.
GetContextWindow(model string) int
}
TokenEstimator estimates token counts for messages before sending to the API. This is useful for validating requests won't exceed model limits.
func NewTokenEstimator ¶
func NewTokenEstimator(config TokenEstimatorConfig) TokenEstimator
NewTokenEstimator creates a new token estimator with the given configuration. If config has zero values, defaults are used for those fields.
type TokenEstimatorConfig ¶
type TokenEstimatorConfig struct {
// CharactersPerToken is the average number of characters per token.
// Default: 4.0 (reasonable for English text)
// Lower values (e.g., 3.0) give more conservative estimates.
CharactersPerToken float64
// CustomContextWindows allows overriding context window sizes for specific models.
// Keys should be model IDs (e.g., "gpt-4o", "claude-3-opus").
CustomContextWindows map[string]int
// TokenOverheadPerMessage is extra tokens added per message for formatting.
// Default: 4 (accounts for role, separators, etc.)
TokenOverheadPerMessage int
}
TokenEstimatorConfig configures token estimation behavior
func DefaultTokenEstimatorConfig ¶
func DefaultTokenEstimatorConfig() TokenEstimatorConfig
DefaultTokenEstimatorConfig returns a TokenEstimatorConfig with sensible defaults
type TokenLimitError ¶
type TokenLimitError struct {
// EstimatedTokens is the estimated prompt token count
EstimatedTokens int
// ContextWindow is the model's maximum context window
ContextWindow int
// AvailableTokens is how many tokens are available (may be negative)
AvailableTokens int
// Model is the model ID
Model string
}
TokenLimitError is returned when a request exceeds token limits
func (*TokenLimitError) Error ¶
func (e *TokenLimitError) Error() string
type TokenValidation ¶
type TokenValidation struct {
// EstimatedTokens is the estimated prompt token count
EstimatedTokens int
// ContextWindow is the model's maximum context window
ContextWindow int
// MaxCompletionTokens is the requested max completion tokens
MaxCompletionTokens int
// AvailableTokens is how many tokens are available for completion
// (ContextWindow - EstimatedTokens)
AvailableTokens int
// ExceedsLimit is true if the prompt exceeds the context window
ExceedsLimit bool
// ExceedsWithCompletion is true if prompt + max_tokens exceeds context
ExceedsWithCompletion bool
}
TokenValidation contains the result of token validation
func ValidateTokens ¶
func ValidateTokens( estimator TokenEstimator, model string, messages []provider.Message, maxCompletionTokens int, ) (*TokenValidation, error)
ValidateTokens checks if the request fits within model limits. Returns validation details including whether limits are exceeded.
type ToolCall ¶
Type aliases for backward compatibility and convenience. These allow thick providers to import from omnillm-core root package. Note: Provider and ChatCompletionStream are defined in provider.go
type ToolFunction ¶
type ToolFunction = provider.ToolFunction
Type aliases for backward compatibility and convenience. These allow thick providers to import from omnillm-core root package. Note: Provider and ChatCompletionStream are defined in provider.go
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
examples
|
|
|
anthropic_streaming
command
|
|
|
architecture_demo
command
|
|
|
basic
command
|
|
|
conversation
command
|
|
|
custom_provider
command
|
|
|
memory_demo
command
|
|
|
ollama
command
|
|
|
ollama_streaming
command
|
|
|
providers_demo
command
|
|
|
streaming
command
|
|
|
xai
command
|
|
|
Package models provides a comprehensive catalog of LLM model identifiers and documentation references for all supported providers.
|
Package models provides a comprehensive catalog of LLM model identifiers and documentation references for all supported providers. |
|
Package provider defines the core interfaces that external LLM providers must implement.
|
Package provider defines the core interfaces that external LLM providers must implement. |
|
providertest
Package providertest provides conformance tests for LLM provider implementations.
|
Package providertest provides conformance tests for LLM provider implementations. |
|
providers
|
|
|
anthropic
Package anthropic provides Anthropic provider adapter for the OmniLLM unified interface
|
Package anthropic provides Anthropic provider adapter for the OmniLLM unified interface |
|
glm
Package glm provides GLM (Zhipu AI) provider adapter for the OmniLLM unified interface
|
Package glm provides GLM (Zhipu AI) provider adapter for the OmniLLM unified interface |
|
kimi
Package kimi provides Kimi (Moonshot AI) provider adapter for the OmniLLM unified interface
|
Package kimi provides Kimi (Moonshot AI) provider adapter for the OmniLLM unified interface |
|
ollama
Package ollama provides Ollama provider adapter for the OmniLLM unified interface
|
Package ollama provides Ollama provider adapter for the OmniLLM unified interface |
|
openai
Package openai provides OpenAI provider adapter for the OmniLLM unified interface
|
Package openai provides OpenAI provider adapter for the OmniLLM unified interface |
|
qwen
Package qwen provides Qwen (Alibaba Cloud) provider adapter for the OmniLLM unified interface
|
Package qwen provides Qwen (Alibaba Cloud) provider adapter for the OmniLLM unified interface |
|
xai
Package xai provides X.AI Grok provider adapter for the OmniLLM unified interface
|
Package xai provides X.AI Grok provider adapter for the OmniLLM unified interface |
|
Package testing provides mock implementations for testing
|
Package testing provides mock implementations for testing |