cache

package
v0.4.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 28, 2025 License: Apache-2.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CosineSimilarity

func CosineSimilarity(a, b []float32) float64

CosineSimilarity calculates the cosine similarity between two vectors Returns a value between -1 and 1, where 1 means identical direction

func DotProduct

func DotProduct(a, b []float32) float64

DotProduct calculates the dot product of two vectors

func EuclideanDistance

func EuclideanDistance(a, b []float32) float64

EuclideanDistance calculates the Euclidean distance between two vectors

func Normalize

func Normalize(v []float32) []float32

Normalize normalizes a vector to unit length

func WithSemanticCache

func WithSemanticCache(embeddingProvider EmbeddingProvider, config *SemanticCacheConfig) func(llm.Client) llm.Client

WithSemanticCache creates a middleware function for semantic caching

Types

type CacheEntry

type CacheEntry struct {
	// Key is the unique identifier for this entry
	Key string `json:"key"`

	// Prompt is the original prompt text
	Prompt string `json:"prompt"`

	// Embedding is the vector representation of the prompt
	Embedding []float32 `json:"embedding"`

	// Response is the cached LLM response
	Response string `json:"response"`

	// Model is the LLM model used
	Model string `json:"model"`

	// TokensUsed is the number of tokens consumed
	TokensUsed int `json:"tokens_used"`

	// CreatedAt is when this entry was created
	CreatedAt time.Time `json:"created_at"`

	// AccessedAt is when this entry was last accessed
	AccessedAt time.Time `json:"accessed_at"`

	// HitCount is the number of times this entry was accessed
	HitCount int64 `json:"hit_count"`

	// Metadata contains additional information
	Metadata map[string]interface{} `json:"metadata,omitempty"`
}

CacheEntry represents a cached LLM response

func FindAboveThreshold

func FindAboveThreshold(target []float32, entries []*CacheEntry, threshold float64) []*CacheEntry

FindAboveThreshold finds all entries with similarity above threshold

func FindMostSimilar

func FindMostSimilar(target []float32, entries []*CacheEntry) (*CacheEntry, float64, int)

FindMostSimilar finds the most similar entry from a list Returns the entry, similarity score, and index

type CacheStats

type CacheStats struct {
	// TotalEntries is the number of entries in cache
	TotalEntries int64 `json:"total_entries"`

	// TotalHits is the total number of cache hits
	TotalHits int64 `json:"total_hits"`

	// TotalMisses is the total number of cache misses
	TotalMisses int64 `json:"total_misses"`

	// HitRate is the cache hit rate (0.0 - 1.0)
	HitRate float64 `json:"hit_rate"`

	// AverageSimilarity is the average similarity score of hits
	AverageSimilarity float64 `json:"average_similarity"`

	// TokensSaved is the estimated tokens saved by caching
	TokensSaved int64 `json:"tokens_saved"`

	// LatencySaved is the estimated latency saved (in ms)
	LatencySaved int64 `json:"latency_saved_ms"`

	// MemoryUsed is the memory used by cache (in bytes)
	MemoryUsed int64 `json:"memory_used_bytes"`
}

CacheStats contains cache performance statistics

type CacheStorage

type CacheStorage interface {
	// Store saves a cache entry
	Store(ctx context.Context, entry *CacheEntry) error

	// Load retrieves a cache entry by key
	Load(ctx context.Context, key string) (*CacheEntry, error)

	// Delete removes a cache entry
	Delete(ctx context.Context, key string) error

	// List returns all cache entries
	List(ctx context.Context) ([]*CacheEntry, error)

	// Clear removes all entries
	Clear(ctx context.Context) error

	// Count returns the number of entries
	Count(ctx context.Context) (int64, error)

	// Close closes the storage
	Close() error
}

CacheStorage defines the storage backend for cache entries

type CachedLLMClient

type CachedLLMClient struct {
	// contains filtered or unexported fields
}

CachedLLMClient wraps an LLM client with semantic caching

func NewCachedLLMClient

func NewCachedLLMClient(config *CachedLLMClientConfig) (*CachedLLMClient, error)

NewCachedLLMClient creates a new cached LLM client

func (*CachedLLMClient) Chat

func (c *CachedLLMClient) Chat(ctx context.Context, messages []llm.Message) (*llm.CompletionResponse, error)

Chat sends a chat request with semantic caching

func (*CachedLLMClient) ClearCache

func (c *CachedLLMClient) ClearCache(ctx context.Context) error

ClearCache clears the semantic cache

func (*CachedLLMClient) Close

func (c *CachedLLMClient) Close() error

Close closes the cached client

func (*CachedLLMClient) Complete

Complete sends a completion request with semantic caching

func (*CachedLLMClient) IsAvailable

func (c *CachedLLMClient) IsAvailable() bool

IsAvailable checks if the client is available

func (*CachedLLMClient) Provider

func (c *CachedLLMClient) Provider() constants.Provider

Provider returns the underlying provider type

func (*CachedLLMClient) Stats

func (c *CachedLLMClient) Stats() *CacheStats

Stats returns cache statistics

type CachedLLMClientConfig

type CachedLLMClientConfig struct {
	// Client is the underlying LLM client
	Client llm.Client

	// EmbeddingProvider generates embeddings for semantic matching
	EmbeddingProvider EmbeddingProvider

	// CacheConfig configures the semantic cache
	CacheConfig *SemanticCacheConfig
}

CachedLLMClientConfig configures the cached LLM client

type EmbeddingProvider

type EmbeddingProvider interface {
	// Embed generates an embedding vector for the given text
	Embed(ctx context.Context, text string) ([]float32, error)

	// EmbedBatch generates embeddings for multiple texts
	EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)

	// Dimension returns the embedding dimension
	Dimension() int
}

EmbeddingProvider generates vector embeddings for text

type MemorySemanticCache

type MemorySemanticCache struct {
	// contains filtered or unexported fields
}

MemorySemanticCache implements SemanticCache with in-memory storage

func NewMemorySemanticCache

func NewMemorySemanticCache(provider EmbeddingProvider, config *SemanticCacheConfig) *MemorySemanticCache

NewMemorySemanticCache creates a new in-memory semantic cache

func (*MemorySemanticCache) Clear

func (c *MemorySemanticCache) Clear(ctx context.Context) error

Clear removes all entries from cache

func (*MemorySemanticCache) Close

func (c *MemorySemanticCache) Close() error

Close closes the cache and releases resources

func (*MemorySemanticCache) Delete

func (c *MemorySemanticCache) Delete(ctx context.Context, key string) error

Delete removes an entry from cache

func (*MemorySemanticCache) Get

func (c *MemorySemanticCache) Get(ctx context.Context, prompt string, model string) (*CacheEntry, float64, error)

Get retrieves a cached response if similarity >= threshold

func (*MemorySemanticCache) Set

func (c *MemorySemanticCache) Set(ctx context.Context, prompt string, response string, model string, tokensUsed int) error

Set stores a response in the cache

func (*MemorySemanticCache) Stats

func (c *MemorySemanticCache) Stats() *CacheStats

Stats returns cache statistics

type MockEmbeddingProvider

type MockEmbeddingProvider struct {
	// contains filtered or unexported fields
}

MockEmbeddingProvider is a mock provider for testing

func NewMockEmbeddingProvider

func NewMockEmbeddingProvider(dimension int) *MockEmbeddingProvider

NewMockEmbeddingProvider creates a mock embedding provider

func (*MockEmbeddingProvider) Dimension

func (p *MockEmbeddingProvider) Dimension() int

Dimension returns the embedding dimension

func (*MockEmbeddingProvider) Embed

func (p *MockEmbeddingProvider) Embed(ctx context.Context, text string) ([]float32, error)

Embed returns the predefined embedding or generates a simple one

func (*MockEmbeddingProvider) EmbedBatch

func (p *MockEmbeddingProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)

EmbedBatch generates embeddings for multiple texts

func (*MockEmbeddingProvider) SetEmbedding

func (p *MockEmbeddingProvider) SetEmbedding(text string, embedding []float32)

SetEmbedding sets a predefined embedding for a text

type OpenAIEmbeddingConfig

type OpenAIEmbeddingConfig struct {
	// APIKey is the OpenAI API key
	APIKey string

	// BaseURL is the optional custom base URL
	BaseURL string

	// Model is the embedding model to use
	// Default: text-embedding-3-small
	Model openai.EmbeddingModel

	// Dimension is the embedding dimension
	// Default: 1536 for text-embedding-3-small
	Dimension int
}

OpenAIEmbeddingConfig configures the OpenAI embedding provider

type OpenAIEmbeddingProvider

type OpenAIEmbeddingProvider struct {
	// contains filtered or unexported fields
}

OpenAIEmbeddingProvider uses OpenAI's embedding API

func NewOpenAIEmbeddingProvider

func NewOpenAIEmbeddingProvider(config *OpenAIEmbeddingConfig) *OpenAIEmbeddingProvider

NewOpenAIEmbeddingProvider creates a new OpenAI embedding provider

func (*OpenAIEmbeddingProvider) Dimension

func (p *OpenAIEmbeddingProvider) Dimension() int

Dimension returns the embedding dimension

func (*OpenAIEmbeddingProvider) Embed

func (p *OpenAIEmbeddingProvider) Embed(ctx context.Context, text string) ([]float32, error)

Embed generates an embedding for the given text

func (*OpenAIEmbeddingProvider) EmbedBatch

func (p *OpenAIEmbeddingProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)

EmbedBatch generates embeddings for multiple texts

type SemanticCache

type SemanticCache interface {
	// Get retrieves a cached response if similarity >= threshold
	// Returns the entry and similarity score if found
	Get(ctx context.Context, prompt string, model string) (*CacheEntry, float64, error)

	// Set stores a response in the cache
	Set(ctx context.Context, prompt string, response string, model string, tokensUsed int) error

	// Delete removes an entry from cache
	Delete(ctx context.Context, key string) error

	// Clear removes all entries from cache
	Clear(ctx context.Context) error

	// Stats returns cache statistics
	Stats() *CacheStats

	// Close closes the cache and releases resources
	Close() error
}

SemanticCache defines the interface for semantic caching

type SemanticCacheConfig

type SemanticCacheConfig struct {
	// SimilarityThreshold is the minimum similarity score for a cache hit (0.0 - 1.0)
	// Default: 0.95
	SimilarityThreshold float64 `json:"similarity_threshold"`

	// MaxEntries is the maximum number of entries in cache
	// Default: 10000
	MaxEntries int `json:"max_entries"`

	// TTL is the time-to-live for cache entries
	// Default: 24 hours
	TTL time.Duration `json:"ttl"`

	// EnableStats enables statistics collection
	// Default: true
	EnableStats bool `json:"enable_stats"`

	// EvictionPolicy determines how entries are evicted when cache is full
	// Options: "lru", "lfu", "fifo"
	// Default: "lru"
	EvictionPolicy string `json:"eviction_policy"`

	// ModelSpecific if true, caches are model-specific
	// Default: true
	ModelSpecific bool `json:"model_specific"`

	// NormalizePrompts if true, normalizes prompts before caching
	// Default: true
	NormalizePrompts bool `json:"normalize_prompts"`
}

SemanticCacheConfig configures the semantic cache

func DefaultSemanticCacheConfig

func DefaultSemanticCacheConfig() *SemanticCacheConfig

DefaultSemanticCacheConfig returns default configuration

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL