Documentation
¶
Index ¶
- func CosineSimilarity(a, b []float32) float64
- func DotProduct(a, b []float32) float64
- func EuclideanDistance(a, b []float32) float64
- func Normalize(v []float32) []float32
- func WithSemanticCache(embeddingProvider EmbeddingProvider, config *SemanticCacheConfig) func(llm.Client) llm.Client
- type CacheEntry
- type CacheStats
- type CacheStorage
- type CachedLLMClient
- func (c *CachedLLMClient) Chat(ctx context.Context, messages []llm.Message) (*llm.CompletionResponse, error)
- func (c *CachedLLMClient) ClearCache(ctx context.Context) error
- func (c *CachedLLMClient) Close() error
- func (c *CachedLLMClient) Complete(ctx context.Context, req *llm.CompletionRequest) (*llm.CompletionResponse, error)
- func (c *CachedLLMClient) IsAvailable() bool
- func (c *CachedLLMClient) Provider() constants.Provider
- func (c *CachedLLMClient) Stats() *CacheStats
- type CachedLLMClientConfig
- type EmbeddingProvider
- type MemorySemanticCache
- func (c *MemorySemanticCache) Clear(ctx context.Context) error
- func (c *MemorySemanticCache) Close() error
- func (c *MemorySemanticCache) Delete(ctx context.Context, key string) error
- func (c *MemorySemanticCache) Get(ctx context.Context, prompt string, model string) (*CacheEntry, float64, error)
- func (c *MemorySemanticCache) Set(ctx context.Context, prompt string, response string, model string, ...) error
- func (c *MemorySemanticCache) Stats() *CacheStats
- type MockEmbeddingProvider
- func (p *MockEmbeddingProvider) Dimension() int
- func (p *MockEmbeddingProvider) Embed(ctx context.Context, text string) ([]float32, error)
- func (p *MockEmbeddingProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)
- func (p *MockEmbeddingProvider) SetEmbedding(text string, embedding []float32)
- type OpenAIEmbeddingConfig
- type OpenAIEmbeddingProvider
- type SemanticCache
- type SemanticCacheConfig
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CosineSimilarity ¶
CosineSimilarity calculates the cosine similarity between two vectors Returns a value between -1 and 1, where 1 means identical direction
func DotProduct ¶
DotProduct calculates the dot product of two vectors
func EuclideanDistance ¶
EuclideanDistance calculates the Euclidean distance between two vectors
func WithSemanticCache ¶
func WithSemanticCache(embeddingProvider EmbeddingProvider, config *SemanticCacheConfig) func(llm.Client) llm.Client
WithSemanticCache creates a middleware function for semantic caching
Types ¶
type CacheEntry ¶
type CacheEntry struct {
// Key is the unique identifier for this entry
Key string `json:"key"`
// Prompt is the original prompt text
Prompt string `json:"prompt"`
// Embedding is the vector representation of the prompt
Embedding []float32 `json:"embedding"`
// Response is the cached LLM response
Response string `json:"response"`
// Model is the LLM model used
Model string `json:"model"`
// TokensUsed is the number of tokens consumed
TokensUsed int `json:"tokens_used"`
// CreatedAt is when this entry was created
CreatedAt time.Time `json:"created_at"`
// AccessedAt is when this entry was last accessed
AccessedAt time.Time `json:"accessed_at"`
// HitCount is the number of times this entry was accessed
HitCount int64 `json:"hit_count"`
// Metadata contains additional information
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
CacheEntry represents a cached LLM response
func FindAboveThreshold ¶
func FindAboveThreshold(target []float32, entries []*CacheEntry, threshold float64) []*CacheEntry
FindAboveThreshold finds all entries with similarity above threshold
func FindMostSimilar ¶
func FindMostSimilar(target []float32, entries []*CacheEntry) (*CacheEntry, float64, int)
FindMostSimilar finds the most similar entry from a list Returns the entry, similarity score, and index
type CacheStats ¶
type CacheStats struct {
// TotalEntries is the number of entries in cache
TotalEntries int64 `json:"total_entries"`
// TotalHits is the total number of cache hits
TotalHits int64 `json:"total_hits"`
// TotalMisses is the total number of cache misses
TotalMisses int64 `json:"total_misses"`
// HitRate is the cache hit rate (0.0 - 1.0)
HitRate float64 `json:"hit_rate"`
// AverageSimilarity is the average similarity score of hits
AverageSimilarity float64 `json:"average_similarity"`
// TokensSaved is the estimated tokens saved by caching
TokensSaved int64 `json:"tokens_saved"`
// LatencySaved is the estimated latency saved (in ms)
LatencySaved int64 `json:"latency_saved_ms"`
// MemoryUsed is the memory used by cache (in bytes)
MemoryUsed int64 `json:"memory_used_bytes"`
}
CacheStats contains cache performance statistics
type CacheStorage ¶
type CacheStorage interface {
// Store saves a cache entry
Store(ctx context.Context, entry *CacheEntry) error
// Load retrieves a cache entry by key
Load(ctx context.Context, key string) (*CacheEntry, error)
// Delete removes a cache entry
Delete(ctx context.Context, key string) error
// List returns all cache entries
List(ctx context.Context) ([]*CacheEntry, error)
// Clear removes all entries
Clear(ctx context.Context) error
// Count returns the number of entries
Count(ctx context.Context) (int64, error)
// Close closes the storage
Close() error
}
CacheStorage defines the storage backend for cache entries
type CachedLLMClient ¶
type CachedLLMClient struct {
// contains filtered or unexported fields
}
CachedLLMClient wraps an LLM client with semantic caching
func NewCachedLLMClient ¶
func NewCachedLLMClient(config *CachedLLMClientConfig) (*CachedLLMClient, error)
NewCachedLLMClient creates a new cached LLM client
func (*CachedLLMClient) Chat ¶
func (c *CachedLLMClient) Chat(ctx context.Context, messages []llm.Message) (*llm.CompletionResponse, error)
Chat sends a chat request with semantic caching
func (*CachedLLMClient) ClearCache ¶
func (c *CachedLLMClient) ClearCache(ctx context.Context) error
ClearCache clears the semantic cache
func (*CachedLLMClient) Close ¶
func (c *CachedLLMClient) Close() error
Close closes the cached client
func (*CachedLLMClient) Complete ¶
func (c *CachedLLMClient) Complete(ctx context.Context, req *llm.CompletionRequest) (*llm.CompletionResponse, error)
Complete sends a completion request with semantic caching
func (*CachedLLMClient) IsAvailable ¶
func (c *CachedLLMClient) IsAvailable() bool
IsAvailable checks if the client is available
func (*CachedLLMClient) Provider ¶
func (c *CachedLLMClient) Provider() constants.Provider
Provider returns the underlying provider type
func (*CachedLLMClient) Stats ¶
func (c *CachedLLMClient) Stats() *CacheStats
Stats returns cache statistics
type CachedLLMClientConfig ¶
type CachedLLMClientConfig struct {
// Client is the underlying LLM client
Client llm.Client
// EmbeddingProvider generates embeddings for semantic matching
EmbeddingProvider EmbeddingProvider
// CacheConfig configures the semantic cache
CacheConfig *SemanticCacheConfig
}
CachedLLMClientConfig configures the cached LLM client
type EmbeddingProvider ¶
type EmbeddingProvider interface {
// Embed generates an embedding vector for the given text
Embed(ctx context.Context, text string) ([]float32, error)
// EmbedBatch generates embeddings for multiple texts
EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)
// Dimension returns the embedding dimension
Dimension() int
}
EmbeddingProvider generates vector embeddings for text
type MemorySemanticCache ¶
type MemorySemanticCache struct {
// contains filtered or unexported fields
}
MemorySemanticCache implements SemanticCache with in-memory storage
func NewMemorySemanticCache ¶
func NewMemorySemanticCache(provider EmbeddingProvider, config *SemanticCacheConfig) *MemorySemanticCache
NewMemorySemanticCache creates a new in-memory semantic cache
func (*MemorySemanticCache) Clear ¶
func (c *MemorySemanticCache) Clear(ctx context.Context) error
Clear removes all entries from cache
func (*MemorySemanticCache) Close ¶
func (c *MemorySemanticCache) Close() error
Close closes the cache and releases resources
func (*MemorySemanticCache) Delete ¶
func (c *MemorySemanticCache) Delete(ctx context.Context, key string) error
Delete removes an entry from cache
func (*MemorySemanticCache) Get ¶
func (c *MemorySemanticCache) Get(ctx context.Context, prompt string, model string) (*CacheEntry, float64, error)
Get retrieves a cached response if similarity >= threshold
func (*MemorySemanticCache) Set ¶
func (c *MemorySemanticCache) Set(ctx context.Context, prompt string, response string, model string, tokensUsed int) error
Set stores a response in the cache
func (*MemorySemanticCache) Stats ¶
func (c *MemorySemanticCache) Stats() *CacheStats
Stats returns cache statistics
type MockEmbeddingProvider ¶
type MockEmbeddingProvider struct {
// contains filtered or unexported fields
}
MockEmbeddingProvider is a mock provider for testing
func NewMockEmbeddingProvider ¶
func NewMockEmbeddingProvider(dimension int) *MockEmbeddingProvider
NewMockEmbeddingProvider creates a mock embedding provider
func (*MockEmbeddingProvider) Dimension ¶
func (p *MockEmbeddingProvider) Dimension() int
Dimension returns the embedding dimension
func (*MockEmbeddingProvider) Embed ¶
Embed returns the predefined embedding or generates a simple one
func (*MockEmbeddingProvider) EmbedBatch ¶
func (p *MockEmbeddingProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)
EmbedBatch generates embeddings for multiple texts
func (*MockEmbeddingProvider) SetEmbedding ¶
func (p *MockEmbeddingProvider) SetEmbedding(text string, embedding []float32)
SetEmbedding sets a predefined embedding for a text
type OpenAIEmbeddingConfig ¶
type OpenAIEmbeddingConfig struct {
// APIKey is the OpenAI API key
APIKey string
// BaseURL is the optional custom base URL
BaseURL string
// Model is the embedding model to use
// Default: text-embedding-3-small
Model openai.EmbeddingModel
// Dimension is the embedding dimension
// Default: 1536 for text-embedding-3-small
Dimension int
}
OpenAIEmbeddingConfig configures the OpenAI embedding provider
type OpenAIEmbeddingProvider ¶
type OpenAIEmbeddingProvider struct {
// contains filtered or unexported fields
}
OpenAIEmbeddingProvider uses OpenAI's embedding API
func NewOpenAIEmbeddingProvider ¶
func NewOpenAIEmbeddingProvider(config *OpenAIEmbeddingConfig) *OpenAIEmbeddingProvider
NewOpenAIEmbeddingProvider creates a new OpenAI embedding provider
func (*OpenAIEmbeddingProvider) Dimension ¶
func (p *OpenAIEmbeddingProvider) Dimension() int
Dimension returns the embedding dimension
func (*OpenAIEmbeddingProvider) EmbedBatch ¶
func (p *OpenAIEmbeddingProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)
EmbedBatch generates embeddings for multiple texts
type SemanticCache ¶
type SemanticCache interface {
// Get retrieves a cached response if similarity >= threshold
// Returns the entry and similarity score if found
Get(ctx context.Context, prompt string, model string) (*CacheEntry, float64, error)
// Set stores a response in the cache
Set(ctx context.Context, prompt string, response string, model string, tokensUsed int) error
// Delete removes an entry from cache
Delete(ctx context.Context, key string) error
// Clear removes all entries from cache
Clear(ctx context.Context) error
// Stats returns cache statistics
Stats() *CacheStats
// Close closes the cache and releases resources
Close() error
}
SemanticCache defines the interface for semantic caching
type SemanticCacheConfig ¶
type SemanticCacheConfig struct {
// SimilarityThreshold is the minimum similarity score for a cache hit (0.0 - 1.0)
// Default: 0.95
SimilarityThreshold float64 `json:"similarity_threshold"`
// MaxEntries is the maximum number of entries in cache
// Default: 10000
MaxEntries int `json:"max_entries"`
// TTL is the time-to-live for cache entries
// Default: 24 hours
TTL time.Duration `json:"ttl"`
// EnableStats enables statistics collection
// Default: true
EnableStats bool `json:"enable_stats"`
// EvictionPolicy determines how entries are evicted when cache is full
// Options: "lru", "lfu", "fifo"
// Default: "lru"
EvictionPolicy string `json:"eviction_policy"`
// ModelSpecific if true, caches are model-specific
// Default: true
ModelSpecific bool `json:"model_specific"`
// NormalizePrompts if true, normalizes prompts before caching
// Default: true
NormalizePrompts bool `json:"normalize_prompts"`
}
SemanticCacheConfig configures the semantic cache
func DefaultSemanticCacheConfig ¶
func DefaultSemanticCacheConfig() *SemanticCacheConfig
DefaultSemanticCacheConfig returns default configuration