Documentation
¶
Index ¶
- Constants
- Variables
- type BulkIngestDocResult
- type BulkIngestDocument
- type BulkIngestOpts
- type BulkIngestResult
- type ChunkOutput
- type ChunkStrategy
- type Chunker
- type CreateCollectionOpts
- type DefaultChunker
- type Engine
- func (e *Engine) BulkIngest(ctx context.Context, namespace, collectionID string, ...) (*BulkIngestResult, error)
- func (e *Engine) CollectionStats(ctx context.Context, namespace, collectionID string) (*types.CollectionStats, error)
- func (e *Engine) CreateCollection(ctx context.Context, namespace string, opts CreateCollectionOpts) (*types.Collection, error)
- func (e *Engine) DeleteCollection(ctx context.Context, namespace, collectionID string) error
- func (e *Engine) DeleteDocument(ctx context.Context, namespace, docID string) error
- func (e *Engine) GetCollection(ctx context.Context, namespace, collectionID string) (*types.Collection, error)
- func (e *Engine) GetDocument(ctx context.Context, namespace, docID string) (*types.Document, error)
- func (e *Engine) Ingest(ctx context.Context, namespace, collectionID, content string, opts *IngestOpts) (*IngestResult, error)
- func (e *Engine) ListCollections(ctx context.Context, namespace, cursor string, limit int) ([]*types.Collection, string, error)
- func (e *Engine) Search(ctx context.Context, namespace, query string, opts *SearchOpts) (*SearchResult, error)
- func (e *Engine) SetExtractionEnqueuer(eq ExtractionEnqueuer)
- type ExtractionEnqueuer
- type IngestOpts
- type IngestResult
- type SearchMode
- type SearchOpts
- type SearchResult
- type SemanticChunker
- type SemanticChunkerOption
Constants ¶
const DefaultSimilarityThreshold = 0.5
DefaultSimilarityThreshold is the cosine similarity threshold for detecting breakpoints. When similarity between adjacent windows drops below this, a new chunk boundary is created.
const DefaultWindowSize = 3
DefaultWindowSize is the number of sentences to include in each embedding window.
Variables ¶
var ( ErrEmptyContent = errors.New("document content cannot be empty") ErrCollectionNotFound = errors.New("collection not found") ErrDocumentNotFound = errors.New("document not found") ErrCollectionExists = errors.New("collection already exists") ErrEmbeddingRequired = errors.New("embedding provider required for search") ErrInvalidChunkConfig = errors.New("invalid chunk configuration") )
Common errors returned by the knowledge engine.
Functions ¶
This section is empty.
Types ¶
type BulkIngestDocResult ¶
type BulkIngestDocResult struct {
Index int `json:"index"`
DocumentID string `json:"document_id,omitempty"`
Title string `json:"title,omitempty"`
ChunksCreated int `json:"chunks_created"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
BulkIngestDocResult contains the result for a single document.
type BulkIngestDocument ¶
type BulkIngestDocument struct {
Content string `json:"content"`
Title string `json:"title,omitempty"`
Source string `json:"source,omitempty"`
ContentType string `json:"content_type,omitempty"`
Metadata map[string]string `json:"metadata,omitempty"`
}
BulkIngestDocument represents a single document for bulk ingestion.
type BulkIngestOpts ¶
type BulkIngestOpts struct {
ChunkConfig *types.ChunkConfig // Override collection's default config
Concurrency int // Number of concurrent workers (0 = default 4)
OnProgress func(completed, total int, doc string) // Optional progress callback
ContinueOnError bool // Continue processing on individual document errors
}
BulkIngestOpts contains options for bulk document ingestion.
type BulkIngestResult ¶
type BulkIngestResult struct {
CollectionID string `json:"collection_id"`
TotalDocuments int `json:"total_documents"`
Succeeded int `json:"succeeded"`
Failed int `json:"failed"`
TotalChunks int `json:"total_chunks"`
Documents []*BulkIngestDocResult `json:"documents"`
}
BulkIngestResult contains the overall result of bulk ingestion.
type ChunkOutput ¶
ChunkOutput represents a single chunk produced by the chunker.
type ChunkStrategy ¶
type ChunkStrategy string
ChunkStrategy defines the available chunking strategies.
const ( ChunkStrategyFixed ChunkStrategy = "fixed" ChunkStrategySentence ChunkStrategy = "sentence" ChunkStrategyParagraph ChunkStrategy = "paragraph" ChunkStrategySemantic ChunkStrategy = "semantic" )
type Chunker ¶
type Chunker interface {
// Chunk splits content into chunks based on the configured strategy.
Chunk(content string, cfg types.ChunkConfig) []ChunkOutput
}
Chunker splits text into chunks for indexing.
type CreateCollectionOpts ¶
type CreateCollectionOpts struct {
Name string // Required: collection name
Description string // Optional description
ChunkConfig *types.ChunkConfig // Chunk configuration (uses default if nil)
}
CreateCollectionOpts contains options for creating a collection.
type DefaultChunker ¶
type DefaultChunker struct{}
DefaultChunker implements all chunking strategies.
func (*DefaultChunker) Chunk ¶
func (c *DefaultChunker) Chunk(content string, cfg types.ChunkConfig) []ChunkOutput
Chunk splits content using the specified strategy.
type Engine ¶
type Engine struct {
// contains filtered or unexported fields
}
Engine implements the knowledge store logic layer. It orchestrates chunking, embedding, and storage operations.
func NewEngine ¶
func NewEngine(store storage.Backend, emb embedding.Provider, cfg *config.KnowledgeConfig) (*Engine, error)
NewEngine creates a new knowledge engine.
func (*Engine) BulkIngest ¶
func (e *Engine) BulkIngest(ctx context.Context, namespace, collectionID string, documents []BulkIngestDocument, opts *BulkIngestOpts) (*BulkIngestResult, error)
BulkIngest ingests multiple documents into a collection with progress reporting. Documents are processed concurrently for efficiency.
func (*Engine) CollectionStats ¶
func (e *Engine) CollectionStats(ctx context.Context, namespace, collectionID string) (*types.CollectionStats, error)
CollectionStats returns statistics for a collection.
func (*Engine) CreateCollection ¶
func (e *Engine) CreateCollection(ctx context.Context, namespace string, opts CreateCollectionOpts) (*types.Collection, error)
CreateCollection creates a new collection.
func (*Engine) DeleteCollection ¶
DeleteCollection removes a collection and all its documents.
func (*Engine) DeleteDocument ¶
DeleteDocument removes a document and all its chunks.
func (*Engine) GetCollection ¶
func (e *Engine) GetCollection(ctx context.Context, namespace, collectionID string) (*types.Collection, error)
GetCollection retrieves a collection by ID.
func (*Engine) GetDocument ¶
GetDocument retrieves a document by ID.
func (*Engine) Ingest ¶
func (e *Engine) Ingest(ctx context.Context, namespace, collectionID, content string, opts *IngestOpts) (*IngestResult, error)
Ingest adds a document to a collection, chunking and generating embeddings.
func (*Engine) ListCollections ¶
func (e *Engine) ListCollections(ctx context.Context, namespace, cursor string, limit int) ([]*types.Collection, string, error)
ListCollections returns all collections in a namespace.
func (*Engine) Search ¶
func (e *Engine) Search(ctx context.Context, namespace, query string, opts *SearchOpts) (*SearchResult, error)
Search performs semantic search across knowledge in a namespace.
func (*Engine) SetExtractionEnqueuer ¶
func (e *Engine) SetExtractionEnqueuer(eq ExtractionEnqueuer)
SetExtractionEnqueuer sets the extraction enqueuer for entity extraction. When set, ingested chunks will be queued for background entity extraction.
type ExtractionEnqueuer ¶
type ExtractionEnqueuer interface {
EnqueueForExtraction(ctx context.Context, namespace, sourceType, sourceID, content string) error
}
ExtractionEnqueuer queues content for entity extraction.
type IngestOpts ¶
type IngestOpts struct {
Title string // Optional document title
Source string // Source URL or identifier
ContentType string // "text", "markdown", "html"
Metadata map[string]string // Optional metadata
ChunkConfig *types.ChunkConfig // Override collection's default config
}
IngestOpts contains options for document ingestion.
type IngestResult ¶
type IngestResult struct {
DocumentID string `json:"document_id"`
ChunksCreated int `json:"chunks_created"`
CollectionID string `json:"collection_id"`
}
IngestResult contains the result of document ingestion.
type SearchMode ¶
type SearchMode string
SearchMode defines the search strategy.
const ( // SearchModeVector uses pure vector similarity search (default). SearchModeVector SearchMode = "vector" // SearchModeHybrid combines vector and text search with RRF. SearchModeHybrid SearchMode = "hybrid" // SearchModeText uses pure full-text search (BM25). SearchModeText SearchMode = "text" )
type SearchOpts ¶
type SearchOpts struct {
CollectionID *string // Optional: limit to specific collection
TopK int // Number of results (0 = default 10)
MinScore float64 // Minimum similarity score (0-1)
Filters map[string]string // Metadata filters
ContextWindow int // Chunks before/after to include (0 = none)
SearchMode SearchMode // Search mode: "vector" (default), "hybrid", or "text"
Alpha float64 // Hybrid search weight: 0=pure text, 1=pure vector, 0.5=equal (default: 0.5)
}
SearchOpts contains options for knowledge search.
type SearchResult ¶
type SearchResult struct {
Results []*types.ChunkResult `json:"results"`
Query string `json:"query"`
TotalFound int `json:"total_found"`
}
SearchResult contains search results with optional context.
type SemanticChunker ¶
type SemanticChunker struct {
// contains filtered or unexported fields
}
SemanticChunker uses embedding similarity to find natural topic boundaries.
func NewSemanticChunker ¶
func NewSemanticChunker(emb embedding.Provider, opts ...SemanticChunkerOption) *SemanticChunker
NewSemanticChunker creates a semantic chunker with the given embedding provider.
func (*SemanticChunker) Chunk ¶
func (c *SemanticChunker) Chunk(ctx context.Context, content string, cfg types.ChunkConfig) ([]ChunkOutput, error)
Chunk splits content into semantically coherent chunks. It embeds sliding windows of sentences and identifies breakpoints where the cosine similarity between adjacent windows drops below the threshold.
type SemanticChunkerOption ¶
type SemanticChunkerOption func(*SemanticChunker)
SemanticChunkerOption configures the semantic chunker.
func WithSimilarityThreshold ¶
func WithSimilarityThreshold(threshold float64) SemanticChunkerOption
WithSimilarityThreshold sets the similarity threshold for breakpoint detection.
func WithWindowSize ¶
func WithWindowSize(size int) SemanticChunkerOption
WithWindowSize sets the number of sentences per embedding window.