Documentation
¶
Overview ¶
Package semanticcache provides semantic caching integration for Bifrost plugin. This plugin caches responses using both direct hash matching (xxhash) and semantic similarity search (embeddings). It supports configurable caching behavior via the VectorStore abstraction, with TTL management and streaming response handling.
Index ¶
- Constants
- Variables
- func AddUserMessage(messages []schemas.ChatMessage, userMessage string) []schemas.ChatMessage
- func AssertCacheHit(t *testing.T, response *schemas.BifrostResponse, expectedCacheType string)
- func AssertNoCacheHit(t *testing.T, response *schemas.BifrostResponse)
- func BuildConversationHistory(systemPrompt string, userAssistantPairs ...[]string) []schemas.ChatMessage
- func CreateBasicChatRequest(content string, temperature float64, maxTokens int) *schemas.BifrostChatRequest
- func CreateBasicResponsesRequest(content string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest
- func CreateContextWithCacheKey(value string) *schemas.BifrostContext
- func CreateContextWithCacheKeyAndNoStore(value string, noStore bool) *schemas.BifrostContext
- func CreateContextWithCacheKeyAndTTL(value string, ttl time.Duration) *schemas.BifrostContext
- func CreateContextWithCacheKeyAndThreshold(value string, threshold float64) *schemas.BifrostContext
- func CreateContextWithCacheKeyAndType(value string, cacheType CacheType) *schemas.BifrostContext
- func CreateConversationRequest(messages []schemas.ChatMessage, temperature float64, maxTokens int) *schemas.BifrostChatRequest
- func CreateEmbeddingRequest(texts []string) *schemas.BifrostEmbeddingRequest
- func CreateImageGenerationRequest(prompt string, size string, quality string) *schemas.BifrostImageGenerationRequest
- func CreateResponsesRequestWithInstructions(content string, instructions string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest
- func CreateResponsesRequestWithTools(content string, temperature float64, maxTokens int, ...) *schemas.BifrostResponsesRequest
- func CreateSpeechRequest(input string, voice string) *schemas.BifrostSpeechRequest
- func CreateStreamingChatRequest(content string, temperature float64, maxTokens int) *schemas.BifrostChatRequest
- func CreateStreamingResponsesRequest(content string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest
- func Init(ctx context.Context, config *Config, logger schemas.Logger, ...) (schemas.LLMPlugin, error)
- func WaitForCache()
- type BaseAccount
- func (baseAccount *BaseAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error)
- func (baseAccount *BaseAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error)
- func (baseAccount *BaseAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error)
- type CacheType
- type Config
- type Plugin
- func (plugin *Plugin) Cleanup() error
- func (plugin *Plugin) ClearCacheForKey(cacheKey string) error
- func (plugin *Plugin) ClearCacheForRequestID(requestID string) error
- func (plugin *Plugin) GetName() string
- func (plugin *Plugin) HTTPTransportPostHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, ...) error
- func (plugin *Plugin) HTTPTransportPreHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest) (*schemas.HTTPResponse, error)
- func (plugin *Plugin) HTTPTransportStreamChunkHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, ...) (*schemas.BifrostStreamChunk, error)
- func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.BifrostResponse, ...) (*schemas.BifrostResponse, *schemas.BifrostError, error)
- func (plugin *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.BifrostRequest) (*schemas.BifrostRequest, *schemas.LLMPluginShortCircuit, error)
- func (plugin *Plugin) WaitForPendingOperations()
- type PluginAccount
- func (pa *PluginAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error)
- func (pa *PluginAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error)
- func (pa *PluginAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error)
- type RetryConfig
- type StreamAccumulator
- type StreamChunk
- type TestSetup
- func CreateTestSetupWithConversationThreshold(t *testing.T, threshold int) *TestSetup
- func CreateTestSetupWithExcludeSystemPrompt(t *testing.T, excludeSystem bool) *TestSetup
- func CreateTestSetupWithThresholdAndExcludeSystem(t *testing.T, threshold int, excludeSystem bool) *TestSetup
- func NewTestSetup(t *testing.T) *TestSetup
- func NewTestSetupWithConfig(t *testing.T, config *Config) *TestSetup
- func NewTestSetupWithVectorStore(t *testing.T, config *Config, storeType vectorstore.VectorStoreType) *TestSetup
Constants ¶
const ( PluginName string = "semantic_cache" DefaultVectorStoreNamespace string = "BifrostSemanticCachePlugin" PluginLoggerPrefix string = "[Semantic Cache]" CacheConnectionTimeout time.Duration = 5 * time.Second CreateNamespaceTimeout time.Duration = 30 * time.Second CacheSetTimeout time.Duration = 30 * time.Second DefaultCacheTTL time.Duration = 5 * time.Minute DefaultCacheThreshold float64 = 0.8 DefaultConversationHistoryThreshold int = 3 )
Plugin constants
const ( CacheKey schemas.BifrostContextKey = "semantic_cache_key" // To set the cache key for a request - REQUIRED for all requests CacheTTLKey schemas.BifrostContextKey = "semantic_cache_ttl" // To explicitly set the TTL for a request CacheThresholdKey schemas.BifrostContextKey = "semantic_cache_threshold" // To explicitly set the threshold for a request CacheTypeKey schemas.BifrostContextKey = "semantic_cache_cache_type" // To explicitly set the cache type for a request CacheNoStoreKey schemas.BifrostContextKey = "semantic_cache_no_store" // To explicitly disable storing the response in the cache )
Variables ¶
var Dependencies []framework.FrameworkDependency = []framework.FrameworkDependency{framework.FrameworkDependencyVectorStore}
Dependencies is a list of dependencies that the plugin requires.
var SelectFields = []string{"request_hash", "response", "stream_chunks", "expires_at", "cache_key", "provider", "model"}
var VectorStoreProperties = map[string]vectorstore.VectorStoreProperties{ "request_hash": { DataType: vectorstore.VectorStorePropertyTypeString, Description: "The hash of the request", }, "response": { DataType: vectorstore.VectorStorePropertyTypeString, Description: "The response from the provider", }, "stream_chunks": { DataType: vectorstore.VectorStorePropertyTypeStringArray, Description: "The stream chunks from the provider", }, "expires_at": { DataType: vectorstore.VectorStorePropertyTypeInteger, Description: "The expiration time of the cache entry", }, "cache_key": { DataType: vectorstore.VectorStorePropertyTypeString, Description: "The cache key from the request", }, "provider": { DataType: vectorstore.VectorStorePropertyTypeString, Description: "The provider used for the request", }, "model": { DataType: vectorstore.VectorStorePropertyTypeString, Description: "The model used for the request", }, "params_hash": { DataType: vectorstore.VectorStorePropertyTypeString, Description: "The hash of the parameters used for the request", }, "from_bifrost_semantic_cache_plugin": { DataType: vectorstore.VectorStorePropertyTypeBoolean, Description: "Whether the cache entry was created by the BifrostSemanticCachePlugin", }, }
Functions ¶
func AddUserMessage ¶ added in v1.2.6
func AddUserMessage(messages []schemas.ChatMessage, userMessage string) []schemas.ChatMessage
AddUserMessage adds a user message to existing conversation
func AssertCacheHit ¶
func AssertCacheHit(t *testing.T, response *schemas.BifrostResponse, expectedCacheType string)
AssertCacheHit verifies that a response was served from cache
func AssertNoCacheHit ¶
func AssertNoCacheHit(t *testing.T, response *schemas.BifrostResponse)
AssertNoCacheHit verifies that a response was NOT served from cache
func BuildConversationHistory ¶ added in v1.2.6
func BuildConversationHistory(systemPrompt string, userAssistantPairs ...[]string) []schemas.ChatMessage
BuildConversationHistory creates a conversation history from pairs of user/assistant messages
func CreateBasicChatRequest ¶
func CreateBasicChatRequest(content string, temperature float64, maxTokens int) *schemas.BifrostChatRequest
CreateBasicChatRequest creates a basic chat completion request for testing
func CreateBasicResponsesRequest ¶ added in v1.3.0
func CreateBasicResponsesRequest(content string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest
CreateBasicResponsesRequest creates a basic Responses API request for testing
func CreateContextWithCacheKey ¶
func CreateContextWithCacheKey(value string) *schemas.BifrostContext
CreateContextWithCacheKey creates a context with the test cache key
func CreateContextWithCacheKeyAndNoStore ¶ added in v1.2.6
func CreateContextWithCacheKeyAndNoStore(value string, noStore bool) *schemas.BifrostContext
CreateContextWithCacheKeyAndNoStore creates a context with cache key and no-store flag
func CreateContextWithCacheKeyAndTTL ¶ added in v1.2.6
func CreateContextWithCacheKeyAndTTL(value string, ttl time.Duration) *schemas.BifrostContext
CreateContextWithCacheKeyAndTTL creates a context with cache key and custom TTL
func CreateContextWithCacheKeyAndThreshold ¶ added in v1.2.6
func CreateContextWithCacheKeyAndThreshold(value string, threshold float64) *schemas.BifrostContext
CreateContextWithCacheKeyAndThreshold creates a context with cache key and custom threshold
func CreateContextWithCacheKeyAndType ¶ added in v1.2.6
func CreateContextWithCacheKeyAndType(value string, cacheType CacheType) *schemas.BifrostContext
CreateContextWithCacheKeyAndType creates a context with cache key and cache type
func CreateConversationRequest ¶ added in v1.2.6
func CreateConversationRequest(messages []schemas.ChatMessage, temperature float64, maxTokens int) *schemas.BifrostChatRequest
CreateConversationRequest creates a chat request with conversation history
func CreateEmbeddingRequest ¶ added in v1.2.6
func CreateEmbeddingRequest(texts []string) *schemas.BifrostEmbeddingRequest
CreateEmbeddingRequest creates an embedding request for testing
func CreateImageGenerationRequest ¶ added in v1.4.9
func CreateImageGenerationRequest(prompt string, size string, quality string) *schemas.BifrostImageGenerationRequest
CreateImageGenerationRequest creates an image generation request for testing
func CreateResponsesRequestWithInstructions ¶ added in v1.3.0
func CreateResponsesRequestWithInstructions(content string, instructions string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest
CreateResponsesRequestWithInstructions creates a Responses API request with system instructions
func CreateResponsesRequestWithTools ¶ added in v1.3.0
func CreateResponsesRequestWithTools(content string, temperature float64, maxTokens int, tools []schemas.ResponsesTool) *schemas.BifrostResponsesRequest
CreateResponsesRequestWithTools creates a Responses API request with tools for testing
func CreateSpeechRequest ¶
func CreateSpeechRequest(input string, voice string) *schemas.BifrostSpeechRequest
CreateSpeechRequest creates a speech synthesis request for testing
func CreateStreamingChatRequest ¶
func CreateStreamingChatRequest(content string, temperature float64, maxTokens int) *schemas.BifrostChatRequest
CreateStreamingChatRequest creates a streaming chat completion request for testing
func CreateStreamingResponsesRequest ¶ added in v1.3.0
func CreateStreamingResponsesRequest(content string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest
CreateStreamingResponsesRequest creates a streaming Responses API request for testing
func Init ¶
func Init(ctx context.Context, config *Config, logger schemas.Logger, store vectorstore.VectorStore) (schemas.LLMPlugin, error)
Init creates a new semantic cache plugin instance with the provided configuration. It uses the VectorStore abstraction for cache operations and returns a configured plugin.
The VectorStore handles the underlying storage implementation and its defaults. The plugin only sets defaults for its own behavior (TTL, cache key generation, etc.).
Parameters:
- config: Semantic cache and plugin configuration (CacheKey is required)
- logger: Logger instance for the plugin
- store: VectorStore instance for cache operations
Returns:
- schemas.LLMPlugin: A configured semantic cache plugin instance
- error: Any error that occurred during plugin initialization
Types ¶
type BaseAccount ¶
type BaseAccount struct{}
BaseAccount implements the schemas.Account interface for testing purposes.
func (*BaseAccount) GetConfigForProvider ¶
func (baseAccount *BaseAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error)
func (*BaseAccount) GetConfiguredProviders ¶
func (baseAccount *BaseAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error)
func (*BaseAccount) GetKeysForProvider ¶
func (baseAccount *BaseAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error)
type Config ¶
type Config struct {
// Embedding Model settings - REQUIRED for semantic caching
Provider schemas.ModelProvider `json:"provider"`
Keys []schemas.Key `json:"keys"`
EmbeddingModel string `json:"embedding_model,omitempty"` // Model to use for generating embeddings (optional)
// Plugin behavior settings
CleanUpOnShutdown bool `json:"cleanup_on_shutdown,omitempty"` // Clean up cache on shutdown (default: false)
TTL time.Duration `json:"ttl,omitempty"` // Time-to-live for cached responses (default: 5min)
Threshold float64 `json:"threshold,omitempty"` // Cosine similarity threshold for semantic matching (default: 0.8)
VectorStoreNamespace string `json:"vector_store_namespace,omitempty"` // Namespace for vector store (optional)
Dimension int `json:"dimension"` // Dimension for vector store
// Advanced caching behavior
ConversationHistoryThreshold int `json:"conversation_history_threshold,omitempty"` // Skip caching for requests with more than this number of messages in the conversation history (default: 3)
CacheByModel *bool `json:"cache_by_model,omitempty"` // Include model in cache key (default: true)
CacheByProvider *bool `json:"cache_by_provider,omitempty"` // Include provider in cache key (default: true)
ExcludeSystemPrompt *bool `json:"exclude_system_prompt,omitempty"` // Exclude system prompt in cache key (default: false)
}
Config contains configuration for the semantic cache plugin. The VectorStore abstraction handles the underlying storage implementation and its defaults. Only specify values you want to override from the semantic cache defaults.
func (*Config) UnmarshalJSON ¶
UnmarshalJSON implements custom JSON unmarshaling for semantic cache Config. It supports TTL parsing from both string durations ("1m", "1hr") and numeric seconds for configurable cache behavior.
type Plugin ¶
type Plugin struct {
// contains filtered or unexported fields
}
Plugin implements the schemas.LLMPlugin interface for semantic caching. It caches responses using a two-tier approach: direct hash matching for exact requests and semantic similarity search for related content. The plugin supports configurable caching behavior via the VectorStore abstraction, including TTL management and streaming response handling.
Fields:
- store: VectorStore instance for semantic cache operations
- config: Plugin configuration including semantic cache and caching settings
- logger: Logger instance for plugin operations
func (*Plugin) Cleanup ¶
Cleanup performs cleanup operations for the semantic cache plugin. It removes all cached entries created by this plugin from the VectorStore only if CleanUpOnShutdown is true. Identifies cache entries by the presence of semantic cache-specific fields (request_hash, cache_key).
The function performs the following operations: 1. Checks if cleanup is enabled via CleanUpOnShutdown config 2. Retrieves all entries and filters client-side to identify cache entries 3. Deletes all matching cache entries from the VectorStore in batches
This method should be called when shutting down the application to ensure proper resource cleanup if configured to do so.
Returns:
- error: Any error that occurred during cleanup operations
func (*Plugin) ClearCacheForKey ¶
ClearCacheForKey deletes cache entries for a specific cache key. Uses the unified VectorStore interface for deletion of all entries with the given cache key.
Parameters:
- cacheKey: The specific cache key to delete
Returns:
- error: Any error that occurred during cache key deletion
func (*Plugin) ClearCacheForRequestID ¶
ClearCacheForRequestID deletes cache entries for a specific request ID. Uses the unified VectorStore interface to delete the single entry by its UUID.
Parameters:
- requestID: The UUID-based request ID to delete cache entries for
Returns:
- error: Any error that occurred during cache key deletion
func (*Plugin) GetName ¶
GetName returns the canonical name of the semantic cache plugin. This name is used for plugin identification and logging purposes.
Returns:
- string: The plugin name for semantic cache
func (*Plugin) HTTPTransportPostHook ¶ added in v1.4.9
func (plugin *Plugin) HTTPTransportPostHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, resp *schemas.HTTPResponse) error
HTTPTransportPostHook is not used for this plugin
func (*Plugin) HTTPTransportPreHook ¶ added in v1.4.9
func (plugin *Plugin) HTTPTransportPreHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest) (*schemas.HTTPResponse, error)
HTTPTransportPreHook is not used for this plugin
func (*Plugin) HTTPTransportStreamChunkHook ¶ added in v1.4.15
func (plugin *Plugin) HTTPTransportStreamChunkHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, chunk *schemas.BifrostStreamChunk) (*schemas.BifrostStreamChunk, error)
HTTPTransportStreamChunkHook passes through streaming chunks unchanged
func (*Plugin) PostLLMHook ¶ added in v1.4.16
func (plugin *Plugin) PostLLMHook(ctx *schemas.BifrostContext, res *schemas.BifrostResponse, bifrostErr *schemas.BifrostError) (*schemas.BifrostResponse, *schemas.BifrostError, error)
PostLLMHook is called after a response is received from a provider. It caches responses in the VectorStore using UUID-based keys with unified metadata structure including provider, model, request hash, and TTL. Handles both single and streaming responses.
The function performs the following operations: 1. Checks configurable caching behavior and skips caching for unsuccessful responses if configured 2. Retrieves the request hash and ID from the context (set during PreLLMHook) 3. Marshals the response for storage 4. Stores the unified cache entry in the VectorStore asynchronously (non-blocking)
The VectorStore Add operation runs in a separate goroutine to avoid blocking the response. The function gracefully handles errors and continues without caching if any step fails, ensuring that response processing is never interrupted by caching issues.
Parameters:
- ctx: Pointer to the schemas.BifrostContext containing the request hash and ID
- res: The response from the provider to be cached
- bifrostErr: The error from the provider, if any (used for success determination)
Returns:
- *schemas.BifrostResponse: The original response, unmodified
- *schemas.BifrostError: The original error, unmodified
- error: Any error that occurred during caching preparation (always nil as errors are handled gracefully)
func (*Plugin) PreLLMHook ¶ added in v1.4.16
func (plugin *Plugin) PreLLMHook(ctx *schemas.BifrostContext, req *schemas.BifrostRequest) (*schemas.BifrostRequest, *schemas.LLMPluginShortCircuit, error)
PreLLMHook is called before a request is processed by Bifrost. It performs a two-stage cache lookup: first direct hash matching, then semantic similarity search. Uses UUID-based keys for entries stored in the VectorStore.
Parameters:
- ctx: Pointer to the schemas.BifrostContext
- req: The incoming Bifrost request
Returns:
- *schemas.BifrostRequest: The original request
- *schemas.BifrostResponse: Cached response if found, nil otherwise
- error: Any error that occurred during cache lookup
func (*Plugin) WaitForPendingOperations ¶ added in v1.4.19
func (plugin *Plugin) WaitForPendingOperations()
WaitForPendingOperations blocks until all pending cache operations (goroutines) complete. This is useful in tests to ensure cache entries are stored before checking for cache hits.
type PluginAccount ¶
type PluginAccount struct {
// contains filtered or unexported fields
}
func (*PluginAccount) GetConfigForProvider ¶
func (pa *PluginAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error)
func (*PluginAccount) GetConfiguredProviders ¶
func (pa *PluginAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error)
func (*PluginAccount) GetKeysForProvider ¶
func (pa *PluginAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error)
type RetryConfig ¶ added in v1.3.0
RetryConfig defines retry configuration for API requests
func DefaultRetryConfig ¶ added in v1.3.0
func DefaultRetryConfig() RetryConfig
DefaultRetryConfig returns the default retry configuration
type StreamAccumulator ¶
type StreamAccumulator struct {
RequestID string // The request ID
Chunks []*StreamChunk // All chunks for this stream
IsComplete bool // Whether the stream is complete
HasError bool // Whether any chunk in the stream had an error
FinalTimestamp time.Time // When the stream completed
Embedding []float32 // Embedding for the original request
Metadata map[string]interface{} // Metadata for caching
TTL time.Duration // TTL for this cache entry
// contains filtered or unexported fields
}
StreamAccumulator manages accumulation of streaming chunks for caching
type StreamChunk ¶
type StreamChunk struct {
Timestamp time.Time // When chunk was received
Response *schemas.BifrostResponse // The actual response chunk
FinishReason *string // If this is the final chunk
}
StreamChunk represents a single chunk from a streaming response
type TestSetup ¶
type TestSetup struct {
Logger schemas.Logger
Store vectorstore.VectorStore
Plugin schemas.LLMPlugin
Client *bifrost.Bifrost
Config *Config
}
TestSetup contains common test setup components
func CreateTestSetupWithConversationThreshold ¶ added in v1.2.6
CreateTestSetupWithConversationThreshold creates a test setup with custom conversation history threshold
func CreateTestSetupWithExcludeSystemPrompt ¶ added in v1.2.6
CreateTestSetupWithExcludeSystemPrompt creates a test setup with ExcludeSystemPrompt setting
func CreateTestSetupWithThresholdAndExcludeSystem ¶ added in v1.2.6
func CreateTestSetupWithThresholdAndExcludeSystem(t *testing.T, threshold int, excludeSystem bool) *TestSetup
CreateTestSetupWithThresholdAndExcludeSystem creates a test setup with both conversation threshold and exclude system prompt settings
func NewTestSetup ¶
NewTestSetup creates a new test setup with default configuration
func NewTestSetupWithConfig ¶
NewTestSetupWithConfig creates a new test setup with custom configuration
func NewTestSetupWithVectorStore ¶ added in v1.4.15
func NewTestSetupWithVectorStore(t *testing.T, config *Config, storeType vectorstore.VectorStoreType) *TestSetup
NewTestSetupWithVectorStore creates a new test setup with custom configuration and vector store type