Documentation
¶
Overview ¶
Package embedder provides text embedding services for semantic search.
Package embedder provides text embedding generation for vector search.
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CohereConfig ¶
type CohereConfig struct {
// APIKey for Cohere API (required).
APIKey string
// BaseURL for the API (default: https://api.cohere.com).
BaseURL string
// Model name (default: embed-english-v3.0).
// Supported: embed-english-v3.0, embed-multilingual-v3.0, embed-v4.0, etc.
Model string
// Dimension of embeddings (auto-detected from model if 0).
Dimension int
// Timeout for API requests (default: 30s).
Timeout time.Duration
// BatchSize for batch embedding (default: 96, Cohere's max per request).
BatchSize int
// InputType specifies the type of input (required for v3+ models).
// Values: "search_document", "search_query", "classification", "clustering"
// Default: "search_document"
InputType string
// OutputDimension for v4+ models (optional).
// Values: 256, 512, 1024, 1536
// If set, overrides model's default dimension.
OutputDimension *int
// Truncate specifies how to handle inputs longer than max tokens.
// Values: "NONE", "START", "END" (default: "END")
Truncate string
}
CohereConfig configures the Cohere embedder.
type CohereEmbedder ¶
type CohereEmbedder struct {
// contains filtered or unexported fields
}
CohereEmbedder implements Embedder using Cohere's v2 embeddings API.
Ported from legacy pkg/embedders/cohere.go and updated for Cohere API v2. See: https://docs.cohere.com/reference/embed
func NewCohereEmbedder ¶
func NewCohereEmbedder(cfg CohereConfig) (*CohereEmbedder, error)
NewCohereEmbedder creates a new Cohere embedder.
func (*CohereEmbedder) Dimension ¶
func (e *CohereEmbedder) Dimension() int
Dimension returns the embedding vector dimension.
func (*CohereEmbedder) EmbedBatch ¶
EmbedBatch converts multiple texts to vector embeddings.
func (*CohereEmbedder) Model ¶
func (e *CohereEmbedder) Model() string
Model returns the model name being used.
type Embedder ¶
type Embedder interface {
// Embed converts text to a vector embedding.
Embed(ctx context.Context, text string) ([]float32, error)
// EmbedBatch converts multiple texts to vector embeddings.
// More efficient than calling Embed multiple times.
EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)
// Dimension returns the embedding vector dimension.
Dimension() int
// Model returns the model name being used.
Model() string
// Close releases any resources held by the embedder.
Close() error
}
Embedder produces vector embeddings from text.
Embeddings are used by IndexService for semantic similarity search. Different providers (OpenAI, Ollama) implement this interface.
type OllamaConfig ¶
type OllamaConfig struct {
// BaseURL for Ollama API (default: http://localhost:11434).
BaseURL string
// Model name (default: nomic-embed-text).
Model string
// Dimension of embeddings (default: 768 for nomic-embed-text).
Dimension int
// Timeout for API requests (default: 30s).
Timeout time.Duration
}
OllamaConfig configures the Ollama embedder.
type OllamaEmbedder ¶
type OllamaEmbedder struct {
// contains filtered or unexported fields
}
OllamaEmbedder implements Embedder using Ollama's embeddings API.
Ported from legacy pkg/embedders/ollama.go.
func NewOllamaEmbedder ¶
func NewOllamaEmbedder(cfg OllamaConfig) (*OllamaEmbedder, error)
NewOllamaEmbedder creates a new Ollama embedder.
func (*OllamaEmbedder) Dimension ¶
func (e *OllamaEmbedder) Dimension() int
Dimension returns the embedding vector dimension.
func (*OllamaEmbedder) EmbedBatch ¶
EmbedBatch converts multiple texts to vector embeddings. Ollama API supports batch processing via array input.
func (*OllamaEmbedder) Model ¶
func (e *OllamaEmbedder) Model() string
Model returns the model name being used.
type OpenAIConfig ¶
type OpenAIConfig struct {
// APIKey for OpenAI API (required).
APIKey string
// BaseURL for the API (default: https://api.openai.com/v1).
BaseURL string
// Model name (default: text-embedding-3-small).
Model string
// Dimension of embeddings (auto-detected from model if 0).
// For text-embedding-3 models, this maps to the 'dimensions' API parameter.
Dimension int
// Timeout for API requests (default: 30s).
Timeout time.Duration
// BatchSize for batch embedding (default: 100).
// Note: OpenAI supports up to 2048 inputs per request, but we use 100 as default
// to stay within token limits (300,000 tokens total per request).
BatchSize int
// EncodingFormat specifies the format to return embeddings in.
// Values: "float" (default), "base64"
EncodingFormat string
// User is a unique identifier representing your end-user.
// Can help OpenAI monitor and detect abuse.
User string
}
OpenAIConfig configures the OpenAI embedder.
type OpenAIEmbedder ¶
type OpenAIEmbedder struct {
// contains filtered or unexported fields
}
OpenAIEmbedder implements Embedder using OpenAI's embeddings API.
Ported from legacy pkg/embedders/openai.go.
func NewOpenAIEmbedder ¶
func NewOpenAIEmbedder(cfg OpenAIConfig) (*OpenAIEmbedder, error)
NewOpenAIEmbedder creates a new OpenAI embedder.
func (*OpenAIEmbedder) Dimension ¶
func (e *OpenAIEmbedder) Dimension() int
Dimension returns the embedding vector dimension.
func (*OpenAIEmbedder) EmbedBatch ¶
EmbedBatch converts multiple texts to vector embeddings.
func (*OpenAIEmbedder) Model ¶
func (e *OpenAIEmbedder) Model() string
Model returns the model name being used.