Documentation
¶
Index ¶
- func EmbedText(ctx context.Context, e Embedder, texts []string) ([][]float32, error)
- func ExtractText(contents [][]ai.ContentPart) []string
- type Embedder
- type EmbedderCapabilities
- func (c EmbedderCapabilities) GetMIMETypeSupport(mimeType string) (MIMETypeSupport, bool)
- func (c EmbedderCapabilities) IsMultimodal() bool
- func (c EmbedderCapabilities) IsTextOnly() bool
- func (c EmbedderCapabilities) SupportsMIMEType(mimeType string) bool
- func (c EmbedderCapabilities) SupportsModality(prefix string) bool
- type MIMETypeSupport
- type SparseEmbedder
- type SparseVector
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ExtractText ¶
func ExtractText(contents [][]ai.ContentPart) []string
ExtractText extracts text from ContentPart slices for text-only embedders. It prefers TextContent but falls back to ImageURLContent URL as text if no text found.
Types ¶
type Embedder ¶
type Embedder interface {
// Capabilities returns what this embedder supports (MIME types, dimensions, etc.)
Capabilities() EmbedderCapabilities
// Embed generates embeddings for content.
// Each []ContentPart represents one document (can be text, image, mixed, etc.)
// Returns one embedding vector per input document.
Embed(ctx context.Context, contents [][]ai.ContentPart) ([][]float32, error)
}
Embedder is the core interface for generating embeddings.
type EmbedderCapabilities ¶
type EmbedderCapabilities struct {
// SupportedMIMETypes lists all MIME types this embedder can process.
// Text embedders should include "text/plain".
SupportedMIMETypes []MIMETypeSupport `json:"supported_mime_types,omitempty"`
// Dimensions lists available output dimensions (empty = fixed/unknown)
Dimensions []int `json:"dimensions,omitempty"`
// DefaultDimension is the default output dimension (0 = unknown)
DefaultDimension int `json:"default_dimension,omitempty"`
// MaxBatchSize is the maximum items per request (0 = unlimited/unknown)
MaxBatchSize int `json:"max_batch_size,omitempty"`
// SupportsFusion indicates if mixed content (text+image) can be
// fused into a single embedding vector
SupportsFusion bool `json:"supports_fusion,omitempty"`
// SupportsURLs indicates if the embedder can fetch content from URLs directly
SupportsURLs bool `json:"supports_urls,omitempty"`
}
EmbedderCapabilities describes what an embedder supports.
func TextOnlyCapabilities ¶
func TextOnlyCapabilities() EmbedderCapabilities
TextOnlyCapabilities returns a basic text-only capability set.
func (EmbedderCapabilities) GetMIMETypeSupport ¶
func (c EmbedderCapabilities) GetMIMETypeSupport(mimeType string) (MIMETypeSupport, bool)
GetMIMETypeSupport returns the support details for a specific MIME type, if supported.
func (EmbedderCapabilities) IsMultimodal ¶
func (c EmbedderCapabilities) IsMultimodal() bool
IsMultimodal returns true if the embedder supports non-text content.
func (EmbedderCapabilities) IsTextOnly ¶
func (c EmbedderCapabilities) IsTextOnly() bool
IsTextOnly returns true if the embedder only supports text.
func (EmbedderCapabilities) SupportsMIMEType ¶
func (c EmbedderCapabilities) SupportsMIMEType(mimeType string) bool
SupportsMIMEType checks if a specific MIME type is supported. Supports exact matches and wildcard patterns like "image/*".
func (EmbedderCapabilities) SupportsModality ¶
func (c EmbedderCapabilities) SupportsModality(prefix string) bool
SupportsModality checks if the embedder supports a broad modality category. prefix should be like "image/", "audio/", "video/", or "text/".
type MIMETypeSupport ¶
type MIMETypeSupport struct {
// MIMEType is the MIME type (e.g., "text/plain", "image/png", "image/*")
MIMEType string `json:"mime_type"`
// MaxSizeBytes is the maximum file size in bytes (0 = unlimited/unknown)
MaxSizeBytes int64 `json:"max_size_bytes,omitempty"`
// MaxWidth is the maximum width for images/video (0 = unlimited/unknown)
MaxWidth int `json:"max_width,omitempty"`
// MaxHeight is the maximum height for images/video (0 = unlimited/unknown)
MaxHeight int `json:"max_height,omitempty"`
// MaxDurationSec is the maximum duration for audio/video in seconds (0 = unlimited/unknown)
MaxDurationSec float64 `json:"max_duration_sec,omitempty"`
}
MIMETypeSupport describes support for a specific MIME type with optional constraints.
type SparseEmbedder ¶
type SparseEmbedder interface {
// SparseEmbed generates sparse embeddings for the given texts.
// Returns one SparseVector per input text.
SparseEmbed(ctx context.Context, texts []string) ([]SparseVector, error)
}
SparseEmbedder generates sparse (SPLADE-style) embeddings from text. Unlike dense Embedder which returns fixed-dimension float vectors, SparseEmbedder returns variable-length sparse vectors with vocab-space indices.
type SparseVector ¶
SparseVector represents a sparse embedding as parallel arrays of indices and values. Indices are token IDs from the model's vocabulary, sorted ascending. Values are the corresponding weights (always positive after SPLADE activation).