semanticcache

package module
v1.4.19 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 12, 2026 License: Apache-2.0 Imports: 19 Imported by: 3

Documentation

Overview

Package semanticcache provides semantic caching integration for Bifrost plugin. This plugin caches responses using both direct hash matching (xxhash) and semantic similarity search (embeddings). It supports configurable caching behavior via the VectorStore abstraction, with TTL management and streaming response handling.

Index

Constants

View Source
const (
	PluginName                          string        = "semantic_cache"
	DefaultVectorStoreNamespace         string        = "BifrostSemanticCachePlugin"
	PluginLoggerPrefix                  string        = "[Semantic Cache]"
	CacheConnectionTimeout              time.Duration = 5 * time.Second
	CreateNamespaceTimeout              time.Duration = 30 * time.Second
	CacheSetTimeout                     time.Duration = 30 * time.Second
	DefaultCacheTTL                     time.Duration = 5 * time.Minute
	DefaultCacheThreshold               float64       = 0.8
	DefaultConversationHistoryThreshold int           = 3
)

Plugin constants

View Source
const (
	CacheKey          schemas.BifrostContextKey = "semantic_cache_key"        // To set the cache key for a request - REQUIRED for all requests
	CacheTTLKey       schemas.BifrostContextKey = "semantic_cache_ttl"        // To explicitly set the TTL for a request
	CacheThresholdKey schemas.BifrostContextKey = "semantic_cache_threshold"  // To explicitly set the threshold for a request
	CacheTypeKey      schemas.BifrostContextKey = "semantic_cache_cache_type" // To explicitly set the cache type for a request
	CacheNoStoreKey   schemas.BifrostContextKey = "semantic_cache_no_store"   // To explicitly disable storing the response in the cache

)

Variables

Dependencies is a list of dependencies that the plugin requires.

View Source
var SelectFields = []string{"request_hash", "response", "stream_chunks", "expires_at", "cache_key", "provider", "model"}
View Source
var VectorStoreProperties = map[string]vectorstore.VectorStoreProperties{
	"request_hash": {
		DataType:    vectorstore.VectorStorePropertyTypeString,
		Description: "The hash of the request",
	},
	"response": {
		DataType:    vectorstore.VectorStorePropertyTypeString,
		Description: "The response from the provider",
	},
	"stream_chunks": {
		DataType:    vectorstore.VectorStorePropertyTypeStringArray,
		Description: "The stream chunks from the provider",
	},
	"expires_at": {
		DataType:    vectorstore.VectorStorePropertyTypeInteger,
		Description: "The expiration time of the cache entry",
	},
	"cache_key": {
		DataType:    vectorstore.VectorStorePropertyTypeString,
		Description: "The cache key from the request",
	},
	"provider": {
		DataType:    vectorstore.VectorStorePropertyTypeString,
		Description: "The provider used for the request",
	},
	"model": {
		DataType:    vectorstore.VectorStorePropertyTypeString,
		Description: "The model used for the request",
	},
	"params_hash": {
		DataType:    vectorstore.VectorStorePropertyTypeString,
		Description: "The hash of the parameters used for the request",
	},
	"from_bifrost_semantic_cache_plugin": {
		DataType:    vectorstore.VectorStorePropertyTypeBoolean,
		Description: "Whether the cache entry was created by the BifrostSemanticCachePlugin",
	},
}

Functions

func AddUserMessage added in v1.2.6

func AddUserMessage(messages []schemas.ChatMessage, userMessage string) []schemas.ChatMessage

AddUserMessage adds a user message to existing conversation

func AssertCacheHit

func AssertCacheHit(t *testing.T, response *schemas.BifrostResponse, expectedCacheType string)

AssertCacheHit verifies that a response was served from cache

func AssertNoCacheHit

func AssertNoCacheHit(t *testing.T, response *schemas.BifrostResponse)

AssertNoCacheHit verifies that a response was NOT served from cache

func BuildConversationHistory added in v1.2.6

func BuildConversationHistory(systemPrompt string, userAssistantPairs ...[]string) []schemas.ChatMessage

BuildConversationHistory creates a conversation history from pairs of user/assistant messages

func CreateBasicChatRequest

func CreateBasicChatRequest(content string, temperature float64, maxTokens int) *schemas.BifrostChatRequest

CreateBasicChatRequest creates a basic chat completion request for testing

func CreateBasicResponsesRequest added in v1.3.0

func CreateBasicResponsesRequest(content string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest

CreateBasicResponsesRequest creates a basic Responses API request for testing

func CreateContextWithCacheKey

func CreateContextWithCacheKey(value string) *schemas.BifrostContext

CreateContextWithCacheKey creates a context with the test cache key

func CreateContextWithCacheKeyAndNoStore added in v1.2.6

func CreateContextWithCacheKeyAndNoStore(value string, noStore bool) *schemas.BifrostContext

CreateContextWithCacheKeyAndNoStore creates a context with cache key and no-store flag

func CreateContextWithCacheKeyAndTTL added in v1.2.6

func CreateContextWithCacheKeyAndTTL(value string, ttl time.Duration) *schemas.BifrostContext

CreateContextWithCacheKeyAndTTL creates a context with cache key and custom TTL

func CreateContextWithCacheKeyAndThreshold added in v1.2.6

func CreateContextWithCacheKeyAndThreshold(value string, threshold float64) *schemas.BifrostContext

CreateContextWithCacheKeyAndThreshold creates a context with cache key and custom threshold

func CreateContextWithCacheKeyAndType added in v1.2.6

func CreateContextWithCacheKeyAndType(value string, cacheType CacheType) *schemas.BifrostContext

CreateContextWithCacheKeyAndType creates a context with cache key and cache type

func CreateConversationRequest added in v1.2.6

func CreateConversationRequest(messages []schemas.ChatMessage, temperature float64, maxTokens int) *schemas.BifrostChatRequest

CreateConversationRequest creates a chat request with conversation history

func CreateEmbeddingRequest added in v1.2.6

func CreateEmbeddingRequest(texts []string) *schemas.BifrostEmbeddingRequest

CreateEmbeddingRequest creates an embedding request for testing

func CreateImageGenerationRequest added in v1.4.9

func CreateImageGenerationRequest(prompt string, size string, quality string) *schemas.BifrostImageGenerationRequest

CreateImageGenerationRequest creates an image generation request for testing

func CreateResponsesRequestWithInstructions added in v1.3.0

func CreateResponsesRequestWithInstructions(content string, instructions string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest

CreateResponsesRequestWithInstructions creates a Responses API request with system instructions

func CreateResponsesRequestWithTools added in v1.3.0

func CreateResponsesRequestWithTools(content string, temperature float64, maxTokens int, tools []schemas.ResponsesTool) *schemas.BifrostResponsesRequest

CreateResponsesRequestWithTools creates a Responses API request with tools for testing

func CreateSpeechRequest

func CreateSpeechRequest(input string, voice string) *schemas.BifrostSpeechRequest

CreateSpeechRequest creates a speech synthesis request for testing

func CreateStreamingChatRequest

func CreateStreamingChatRequest(content string, temperature float64, maxTokens int) *schemas.BifrostChatRequest

CreateStreamingChatRequest creates a streaming chat completion request for testing

func CreateStreamingResponsesRequest added in v1.3.0

func CreateStreamingResponsesRequest(content string, temperature float64, maxTokens int) *schemas.BifrostResponsesRequest

CreateStreamingResponsesRequest creates a streaming Responses API request for testing

func Init

func Init(ctx context.Context, config *Config, logger schemas.Logger, store vectorstore.VectorStore) (schemas.LLMPlugin, error)

Init creates a new semantic cache plugin instance with the provided configuration. It uses the VectorStore abstraction for cache operations and returns a configured plugin.

The VectorStore handles the underlying storage implementation and its defaults. The plugin only sets defaults for its own behavior (TTL, cache key generation, etc.).

Parameters:

  • config: Semantic cache and plugin configuration (CacheKey is required)
  • logger: Logger instance for the plugin
  • store: VectorStore instance for cache operations

Returns:

  • schemas.LLMPlugin: A configured semantic cache plugin instance
  • error: Any error that occurred during plugin initialization

func WaitForCache

func WaitForCache()

WaitForCache waits for async cache operations to complete

Types

type BaseAccount

type BaseAccount struct{}

BaseAccount implements the schemas.Account interface for testing purposes.

func (*BaseAccount) GetConfigForProvider

func (baseAccount *BaseAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error)

func (*BaseAccount) GetConfiguredProviders

func (baseAccount *BaseAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error)

func (*BaseAccount) GetKeysForProvider

func (baseAccount *BaseAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error)

type CacheType

type CacheType string
const (
	CacheTypeDirect   CacheType = "direct"
	CacheTypeSemantic CacheType = "semantic"
)

type Config

type Config struct {
	// Embedding Model settings - REQUIRED for semantic caching
	Provider       schemas.ModelProvider `json:"provider"`
	Keys           []schemas.Key         `json:"keys"`
	EmbeddingModel string                `json:"embedding_model,omitempty"` // Model to use for generating embeddings (optional)

	// Plugin behavior settings
	CleanUpOnShutdown    bool          `json:"cleanup_on_shutdown,omitempty"`    // Clean up cache on shutdown (default: false)
	TTL                  time.Duration `json:"ttl,omitempty"`                    // Time-to-live for cached responses (default: 5min)
	Threshold            float64       `json:"threshold,omitempty"`              // Cosine similarity threshold for semantic matching (default: 0.8)
	VectorStoreNamespace string        `json:"vector_store_namespace,omitempty"` // Namespace for vector store (optional)
	Dimension            int           `json:"dimension"`                        // Dimension for vector store

	// Advanced caching behavior
	ConversationHistoryThreshold int   `json:"conversation_history_threshold,omitempty"` // Skip caching for requests with more than this number of messages in the conversation history (default: 3)
	CacheByModel                 *bool `json:"cache_by_model,omitempty"`                 // Include model in cache key (default: true)
	CacheByProvider              *bool `json:"cache_by_provider,omitempty"`              // Include provider in cache key (default: true)
	ExcludeSystemPrompt          *bool `json:"exclude_system_prompt,omitempty"`          // Exclude system prompt in cache key (default: false)
}

Config contains configuration for the semantic cache plugin. The VectorStore abstraction handles the underlying storage implementation and its defaults. Only specify values you want to override from the semantic cache defaults.

func (*Config) UnmarshalJSON

func (c *Config) UnmarshalJSON(data []byte) error

UnmarshalJSON implements custom JSON unmarshaling for semantic cache Config. It supports TTL parsing from both string durations ("1m", "1hr") and numeric seconds for configurable cache behavior.

type Plugin

type Plugin struct {
	// contains filtered or unexported fields
}

Plugin implements the schemas.LLMPlugin interface for semantic caching. It caches responses using a two-tier approach: direct hash matching for exact requests and semantic similarity search for related content. The plugin supports configurable caching behavior via the VectorStore abstraction, including TTL management and streaming response handling.

Fields:

  • store: VectorStore instance for semantic cache operations
  • config: Plugin configuration including semantic cache and caching settings
  • logger: Logger instance for plugin operations

func (*Plugin) Cleanup

func (plugin *Plugin) Cleanup() error

Cleanup performs cleanup operations for the semantic cache plugin. It removes all cached entries created by this plugin from the VectorStore only if CleanUpOnShutdown is true. Identifies cache entries by the presence of semantic cache-specific fields (request_hash, cache_key).

The function performs the following operations: 1. Checks if cleanup is enabled via CleanUpOnShutdown config 2. Retrieves all entries and filters client-side to identify cache entries 3. Deletes all matching cache entries from the VectorStore in batches

This method should be called when shutting down the application to ensure proper resource cleanup if configured to do so.

Returns:

  • error: Any error that occurred during cleanup operations

func (*Plugin) ClearCacheForKey

func (plugin *Plugin) ClearCacheForKey(cacheKey string) error

ClearCacheForKey deletes cache entries for a specific cache key. Uses the unified VectorStore interface for deletion of all entries with the given cache key.

Parameters:

  • cacheKey: The specific cache key to delete

Returns:

  • error: Any error that occurred during cache key deletion

func (*Plugin) ClearCacheForRequestID

func (plugin *Plugin) ClearCacheForRequestID(requestID string) error

ClearCacheForRequestID deletes cache entries for a specific request ID. Uses the unified VectorStore interface to delete the single entry by its UUID.

Parameters:

  • requestID: The UUID-based request ID to delete cache entries for

Returns:

  • error: Any error that occurred during cache key deletion

func (*Plugin) GetName

func (plugin *Plugin) GetName() string

GetName returns the canonical name of the semantic cache plugin. This name is used for plugin identification and logging purposes.

Returns:

  • string: The plugin name for semantic cache

func (*Plugin) HTTPTransportPostHook added in v1.4.9

func (plugin *Plugin) HTTPTransportPostHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, resp *schemas.HTTPResponse) error

HTTPTransportPostHook is not used for this plugin

func (*Plugin) HTTPTransportPreHook added in v1.4.9

func (plugin *Plugin) HTTPTransportPreHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest) (*schemas.HTTPResponse, error)

HTTPTransportPreHook is not used for this plugin

func (*Plugin) HTTPTransportStreamChunkHook added in v1.4.15

func (plugin *Plugin) HTTPTransportStreamChunkHook(ctx *schemas.BifrostContext, req *schemas.HTTPRequest, chunk *schemas.BifrostStreamChunk) (*schemas.BifrostStreamChunk, error)

HTTPTransportStreamChunkHook passes through streaming chunks unchanged

func (*Plugin) PostLLMHook added in v1.4.16

PostLLMHook is called after a response is received from a provider. It caches responses in the VectorStore using UUID-based keys with unified metadata structure including provider, model, request hash, and TTL. Handles both single and streaming responses.

The function performs the following operations: 1. Checks configurable caching behavior and skips caching for unsuccessful responses if configured 2. Retrieves the request hash and ID from the context (set during PreLLMHook) 3. Marshals the response for storage 4. Stores the unified cache entry in the VectorStore asynchronously (non-blocking)

The VectorStore Add operation runs in a separate goroutine to avoid blocking the response. The function gracefully handles errors and continues without caching if any step fails, ensuring that response processing is never interrupted by caching issues.

Parameters:

  • ctx: Pointer to the schemas.BifrostContext containing the request hash and ID
  • res: The response from the provider to be cached
  • bifrostErr: The error from the provider, if any (used for success determination)

Returns:

  • *schemas.BifrostResponse: The original response, unmodified
  • *schemas.BifrostError: The original error, unmodified
  • error: Any error that occurred during caching preparation (always nil as errors are handled gracefully)

func (*Plugin) PreLLMHook added in v1.4.16

PreLLMHook is called before a request is processed by Bifrost. It performs a two-stage cache lookup: first direct hash matching, then semantic similarity search. Uses UUID-based keys for entries stored in the VectorStore.

Parameters:

  • ctx: Pointer to the schemas.BifrostContext
  • req: The incoming Bifrost request

Returns:

  • *schemas.BifrostRequest: The original request
  • *schemas.BifrostResponse: Cached response if found, nil otherwise
  • error: Any error that occurred during cache lookup

func (*Plugin) WaitForPendingOperations added in v1.4.19

func (plugin *Plugin) WaitForPendingOperations()

WaitForPendingOperations blocks until all pending cache operations (goroutines) complete. This is useful in tests to ensure cache entries are stored before checking for cache hits.

type PluginAccount

type PluginAccount struct {
	// contains filtered or unexported fields
}

func (*PluginAccount) GetConfigForProvider

func (pa *PluginAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error)

func (*PluginAccount) GetConfiguredProviders

func (pa *PluginAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error)

func (*PluginAccount) GetKeysForProvider

func (pa *PluginAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error)

type RetryConfig added in v1.3.0

type RetryConfig struct {
	MaxRetries int
	BaseDelay  time.Duration
}

RetryConfig defines retry configuration for API requests

func DefaultRetryConfig added in v1.3.0

func DefaultRetryConfig() RetryConfig

DefaultRetryConfig returns the default retry configuration

type StreamAccumulator

type StreamAccumulator struct {
	RequestID      string                 // The request ID
	Chunks         []*StreamChunk         // All chunks for this stream
	IsComplete     bool                   // Whether the stream is complete
	HasError       bool                   // Whether any chunk in the stream had an error
	FinalTimestamp time.Time              // When the stream completed
	Embedding      []float32              // Embedding for the original request
	Metadata       map[string]interface{} // Metadata for caching
	TTL            time.Duration          // TTL for this cache entry
	// contains filtered or unexported fields
}

StreamAccumulator manages accumulation of streaming chunks for caching

type StreamChunk

type StreamChunk struct {
	Timestamp    time.Time                // When chunk was received
	Response     *schemas.BifrostResponse // The actual response chunk
	FinishReason *string                  // If this is the final chunk
}

StreamChunk represents a single chunk from a streaming response

type TestSetup

type TestSetup struct {
	Logger schemas.Logger
	Store  vectorstore.VectorStore
	Plugin schemas.LLMPlugin
	Client *bifrost.Bifrost
	Config *Config
}

TestSetup contains common test setup components

func CreateTestSetupWithConversationThreshold added in v1.2.6

func CreateTestSetupWithConversationThreshold(t *testing.T, threshold int) *TestSetup

CreateTestSetupWithConversationThreshold creates a test setup with custom conversation history threshold

func CreateTestSetupWithExcludeSystemPrompt added in v1.2.6

func CreateTestSetupWithExcludeSystemPrompt(t *testing.T, excludeSystem bool) *TestSetup

CreateTestSetupWithExcludeSystemPrompt creates a test setup with ExcludeSystemPrompt setting

func CreateTestSetupWithThresholdAndExcludeSystem added in v1.2.6

func CreateTestSetupWithThresholdAndExcludeSystem(t *testing.T, threshold int, excludeSystem bool) *TestSetup

CreateTestSetupWithThresholdAndExcludeSystem creates a test setup with both conversation threshold and exclude system prompt settings

func NewTestSetup

func NewTestSetup(t *testing.T) *TestSetup

NewTestSetup creates a new test setup with default configuration

func NewTestSetupWithConfig

func NewTestSetupWithConfig(t *testing.T, config *Config) *TestSetup

NewTestSetupWithConfig creates a new test setup with custom configuration

func NewTestSetupWithVectorStore added in v1.4.15

func NewTestSetupWithVectorStore(t *testing.T, config *Config, storeType vectorstore.VectorStoreType) *TestSetup

NewTestSetupWithVectorStore creates a new test setup with custom configuration and vector store type

func (*TestSetup) Cleanup

func (ts *TestSetup) Cleanup()

Cleanup cleans up test resources

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL