knowledge

package
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 8, 2026 License: MIT Imports: 13 Imported by: 0

Documentation

Index

Constants

View Source
const DefaultSimilarityThreshold = 0.5

DefaultSimilarityThreshold is the cosine similarity threshold for detecting breakpoints. When similarity between adjacent windows drops below this, a new chunk boundary is created.

View Source
const DefaultWindowSize = 3

DefaultWindowSize is the number of sentences to include in each embedding window.

Variables

View Source
var (
	ErrEmptyContent       = errors.New("document content cannot be empty")
	ErrCollectionNotFound = errors.New("collection not found")
	ErrDocumentNotFound   = errors.New("document not found")
	ErrCollectionExists   = errors.New("collection already exists")
	ErrEmbeddingRequired  = errors.New("embedding provider required for search")
	ErrInvalidChunkConfig = errors.New("invalid chunk configuration")
)

Common errors returned by the knowledge engine.

Functions

This section is empty.

Types

type BulkIngestDocResult

type BulkIngestDocResult struct {
	Index         int    `json:"index"`
	DocumentID    string `json:"document_id,omitempty"`
	Title         string `json:"title,omitempty"`
	ChunksCreated int    `json:"chunks_created"`
	Success       bool   `json:"success"`
	Error         string `json:"error,omitempty"`
}

BulkIngestDocResult contains the result for a single document.

type BulkIngestDocument

type BulkIngestDocument struct {
	Content     string            `json:"content"`
	Title       string            `json:"title,omitempty"`
	Source      string            `json:"source,omitempty"`
	ContentType string            `json:"content_type,omitempty"`
	Metadata    map[string]string `json:"metadata,omitempty"`
}

BulkIngestDocument represents a single document for bulk ingestion.

type BulkIngestOpts

type BulkIngestOpts struct {
	ChunkConfig     *types.ChunkConfig                     // Override collection's default config
	Concurrency     int                                    // Number of concurrent workers (0 = default 4)
	OnProgress      func(completed, total int, doc string) // Optional progress callback
	ContinueOnError bool                                   // Continue processing on individual document errors
}

BulkIngestOpts contains options for bulk document ingestion.

type BulkIngestResult

type BulkIngestResult struct {
	CollectionID   string                 `json:"collection_id"`
	TotalDocuments int                    `json:"total_documents"`
	Succeeded      int                    `json:"succeeded"`
	Failed         int                    `json:"failed"`
	TotalChunks    int                    `json:"total_chunks"`
	Documents      []*BulkIngestDocResult `json:"documents"`
}

BulkIngestResult contains the overall result of bulk ingestion.

type ChunkOutput

type ChunkOutput struct {
	Content    string
	Index      int
	TokenCount int
}

ChunkOutput represents a single chunk produced by the chunker.

type ChunkStrategy

type ChunkStrategy string

ChunkStrategy defines the available chunking strategies.

const (
	ChunkStrategyFixed     ChunkStrategy = "fixed"
	ChunkStrategySentence  ChunkStrategy = "sentence"
	ChunkStrategyParagraph ChunkStrategy = "paragraph"
	ChunkStrategySemantic  ChunkStrategy = "semantic"
)

type Chunker

type Chunker interface {
	// Chunk splits content into chunks based on the configured strategy.
	Chunk(content string, cfg types.ChunkConfig) []ChunkOutput
}

Chunker splits text into chunks for indexing.

type CreateCollectionOpts

type CreateCollectionOpts struct {
	Name        string             // Required: collection name
	Description string             // Optional description
	ChunkConfig *types.ChunkConfig // Chunk configuration (uses default if nil)
}

CreateCollectionOpts contains options for creating a collection.

type DefaultChunker

type DefaultChunker struct{}

DefaultChunker implements all chunking strategies.

func NewChunker

func NewChunker() *DefaultChunker

NewChunker creates a new default chunker.

func (*DefaultChunker) Chunk

func (c *DefaultChunker) Chunk(content string, cfg types.ChunkConfig) []ChunkOutput

Chunk splits content using the specified strategy.

type Engine

type Engine struct {
	// contains filtered or unexported fields
}

Engine implements the knowledge store logic layer. It orchestrates chunking, embedding, and storage operations.

func NewEngine

func NewEngine(store storage.Backend, emb embedding.Provider, cfg *config.KnowledgeConfig) (*Engine, error)

NewEngine creates a new knowledge engine.

func (*Engine) BulkIngest

func (e *Engine) BulkIngest(ctx context.Context, namespace, collectionID string, documents []BulkIngestDocument, opts *BulkIngestOpts) (*BulkIngestResult, error)

BulkIngest ingests multiple documents into a collection with progress reporting. Documents are processed concurrently for efficiency.

func (*Engine) CollectionStats

func (e *Engine) CollectionStats(ctx context.Context, namespace, collectionID string) (*types.CollectionStats, error)

CollectionStats returns statistics for a collection.

func (*Engine) CreateCollection

func (e *Engine) CreateCollection(ctx context.Context, namespace string, opts CreateCollectionOpts) (*types.Collection, error)

CreateCollection creates a new collection.

func (*Engine) DeleteCollection

func (e *Engine) DeleteCollection(ctx context.Context, namespace, collectionID string) error

DeleteCollection removes a collection and all its documents.

func (*Engine) DeleteDocument

func (e *Engine) DeleteDocument(ctx context.Context, namespace, docID string) error

DeleteDocument removes a document and all its chunks.

func (*Engine) GetCollection

func (e *Engine) GetCollection(ctx context.Context, namespace, collectionID string) (*types.Collection, error)

GetCollection retrieves a collection by ID.

func (*Engine) GetDocument

func (e *Engine) GetDocument(ctx context.Context, namespace, docID string) (*types.Document, error)

GetDocument retrieves a document by ID.

func (*Engine) Ingest

func (e *Engine) Ingest(ctx context.Context, namespace, collectionID, content string, opts *IngestOpts) (*IngestResult, error)

Ingest adds a document to a collection, chunking and generating embeddings.

func (*Engine) ListCollections

func (e *Engine) ListCollections(ctx context.Context, namespace, cursor string, limit int) ([]*types.Collection, string, error)

ListCollections returns all collections in a namespace.

func (*Engine) Search

func (e *Engine) Search(ctx context.Context, namespace, query string, opts *SearchOpts) (*SearchResult, error)

Search performs semantic search across knowledge in a namespace.

func (*Engine) SetExtractionEnqueuer

func (e *Engine) SetExtractionEnqueuer(eq ExtractionEnqueuer)

SetExtractionEnqueuer sets the extraction enqueuer for entity extraction. When set, ingested chunks will be queued for background entity extraction.

type ExtractionEnqueuer

type ExtractionEnqueuer interface {
	EnqueueForExtraction(ctx context.Context, namespace, sourceType, sourceID, content string) error
}

ExtractionEnqueuer queues content for entity extraction.

type IngestOpts

type IngestOpts struct {
	Title       string             // Optional document title
	Source      string             // Source URL or identifier
	ContentType string             // "text", "markdown", "html"
	Metadata    map[string]string  // Optional metadata
	ChunkConfig *types.ChunkConfig // Override collection's default config
}

IngestOpts contains options for document ingestion.

type IngestResult

type IngestResult struct {
	DocumentID    string `json:"document_id"`
	ChunksCreated int    `json:"chunks_created"`
	CollectionID  string `json:"collection_id"`
}

IngestResult contains the result of document ingestion.

type SearchMode

type SearchMode string

SearchMode defines the search strategy.

const (
	// SearchModeVector uses pure vector similarity search (default).
	SearchModeVector SearchMode = "vector"
	// SearchModeHybrid combines vector and text search with RRF.
	SearchModeHybrid SearchMode = "hybrid"
	// SearchModeText uses pure full-text search (BM25).
	SearchModeText SearchMode = "text"
)

type SearchOpts

type SearchOpts struct {
	CollectionID  *string           // Optional: limit to specific collection
	TopK          int               // Number of results (0 = default 10)
	MinScore      float64           // Minimum similarity score (0-1)
	Filters       map[string]string // Metadata filters
	ContextWindow int               // Chunks before/after to include (0 = none)
	SearchMode    SearchMode        // Search mode: "vector" (default), "hybrid", or "text"
	Alpha         float64           // Hybrid search weight: 0=pure text, 1=pure vector, 0.5=equal (default: 0.5)
}

SearchOpts contains options for knowledge search.

type SearchResult

type SearchResult struct {
	Results    []*types.ChunkResult `json:"results"`
	Query      string               `json:"query"`
	TotalFound int                  `json:"total_found"`
}

SearchResult contains search results with optional context.

type SemanticChunker

type SemanticChunker struct {
	// contains filtered or unexported fields
}

SemanticChunker uses embedding similarity to find natural topic boundaries.

func NewSemanticChunker

func NewSemanticChunker(emb embedding.Provider, opts ...SemanticChunkerOption) *SemanticChunker

NewSemanticChunker creates a semantic chunker with the given embedding provider.

func (*SemanticChunker) Chunk

func (c *SemanticChunker) Chunk(ctx context.Context, content string, cfg types.ChunkConfig) ([]ChunkOutput, error)

Chunk splits content into semantically coherent chunks. It embeds sliding windows of sentences and identifies breakpoints where the cosine similarity between adjacent windows drops below the threshold.

type SemanticChunkerOption

type SemanticChunkerOption func(*SemanticChunker)

SemanticChunkerOption configures the semantic chunker.

func WithSimilarityThreshold

func WithSimilarityThreshold(threshold float64) SemanticChunkerOption

WithSimilarityThreshold sets the similarity threshold for breakpoint detection.

func WithWindowSize

func WithWindowSize(size int) SemanticChunkerOption

WithWindowSize sets the number of sentences per embedding window.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL