knowledge

package

v0.2.0 Latest Latest Go to latest Published: Mar 8, 2026 License: MIT Imports: 13 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/petal-labs/cortex

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
type BulkIngestDocResult
type BulkIngestDocument
type BulkIngestOpts
type BulkIngestResult
type ChunkOutput
type ChunkStrategy
type Chunker
type CreateCollectionOpts
type DefaultChunker
- func NewChunker() *DefaultChunker
- func (c *DefaultChunker) Chunk(content string, cfg types.ChunkConfig) []ChunkOutput
type Engine
- func NewEngine(store storage.Backend, emb embedding.Provider, cfg *config.KnowledgeConfig) (*Engine, error)
- func (e *Engine) BulkIngest(ctx context.Context, namespace, collectionID string, ...) (*BulkIngestResult, error)
- func (e *Engine) CollectionStats(ctx context.Context, namespace, collectionID string) (*types.CollectionStats, error)
- func (e *Engine) CreateCollection(ctx context.Context, namespace string, opts CreateCollectionOpts) (*types.Collection, error)
- func (e *Engine) DeleteCollection(ctx context.Context, namespace, collectionID string) error
- func (e *Engine) DeleteDocument(ctx context.Context, namespace, docID string) error
- func (e *Engine) GetCollection(ctx context.Context, namespace, collectionID string) (*types.Collection, error)
- func (e *Engine) GetDocument(ctx context.Context, namespace, docID string) (*types.Document, error)
- func (e *Engine) Ingest(ctx context.Context, namespace, collectionID, content string, opts *IngestOpts) (*IngestResult, error)
- func (e *Engine) ListCollections(ctx context.Context, namespace, cursor string, limit int) ([]*types.Collection, string, error)
- func (e *Engine) Search(ctx context.Context, namespace, query string, opts *SearchOpts) (*SearchResult, error)
- func (e *Engine) SetExtractionEnqueuer(eq ExtractionEnqueuer)
type ExtractionEnqueuer
type IngestOpts
type IngestResult
type SearchMode
type SearchOpts
type SearchResult
type SemanticChunker
- func NewSemanticChunker(emb embedding.Provider, opts ...SemanticChunkerOption) *SemanticChunker
- func (c *SemanticChunker) Chunk(ctx context.Context, content string, cfg types.ChunkConfig) ([]ChunkOutput, error)
type SemanticChunkerOption
- func WithSimilarityThreshold(threshold float64) SemanticChunkerOption
- func WithWindowSize(size int) SemanticChunkerOption

Constants ¶

View Source

const DefaultSimilarityThreshold = 0.5

DefaultSimilarityThreshold is the cosine similarity threshold for detecting breakpoints. When similarity between adjacent windows drops below this, a new chunk boundary is created.

View Source

const DefaultWindowSize = 3

DefaultWindowSize is the number of sentences to include in each embedding window.

Variables ¶

View Source

var (
	ErrEmptyContent       = errors.New("document content cannot be empty")
	ErrCollectionNotFound = errors.New("collection not found")
	ErrDocumentNotFound   = errors.New("document not found")
	ErrCollectionExists   = errors.New("collection already exists")
	ErrEmbeddingRequired  = errors.New("embedding provider required for search")
	ErrInvalidChunkConfig = errors.New("invalid chunk configuration")
)

Common errors returned by the knowledge engine.

Functions ¶

This section is empty.

Types ¶

type BulkIngestDocResult ¶

type BulkIngestDocResult struct {
	Index         int    `json:"index"`
	DocumentID    string `json:"document_id,omitempty"`
	Title         string `json:"title,omitempty"`
	ChunksCreated int    `json:"chunks_created"`
	Success       bool   `json:"success"`
	Error         string `json:"error,omitempty"`
}

BulkIngestDocResult contains the result for a single document.

type BulkIngestDocument ¶

type BulkIngestDocument struct {
	Content     string            `json:"content"`
	Title       string            `json:"title,omitempty"`
	Source      string            `json:"source,omitempty"`
	ContentType string            `json:"content_type,omitempty"`
	Metadata    map[string]string `json:"metadata,omitempty"`
}

BulkIngestDocument represents a single document for bulk ingestion.

type BulkIngestOpts ¶

type BulkIngestOpts struct {
	ChunkConfig     *types.ChunkConfig                     // Override collection's default config
	Concurrency     int                                    // Number of concurrent workers (0 = default 4)
	OnProgress      func(completed, total int, doc string) // Optional progress callback
	ContinueOnError bool                                   // Continue processing on individual document errors
}

BulkIngestOpts contains options for bulk document ingestion.

type BulkIngestResult ¶

type BulkIngestResult struct {
	CollectionID   string                 `json:"collection_id"`
	TotalDocuments int                    `json:"total_documents"`
	Succeeded      int                    `json:"succeeded"`
	Failed         int                    `json:"failed"`
	TotalChunks    int                    `json:"total_chunks"`
	Documents      []*BulkIngestDocResult `json:"documents"`
}

BulkIngestResult contains the overall result of bulk ingestion.

type ChunkOutput ¶

type ChunkOutput struct {
	Content    string
	Index      int
	TokenCount int
}

ChunkOutput represents a single chunk produced by the chunker.

type ChunkStrategy ¶

type ChunkStrategy string

ChunkStrategy defines the available chunking strategies.

const (
	ChunkStrategyFixed     ChunkStrategy = "fixed"
	ChunkStrategySentence  ChunkStrategy = "sentence"
	ChunkStrategyParagraph ChunkStrategy = "paragraph"
	ChunkStrategySemantic  ChunkStrategy = "semantic"
)

type Chunker ¶

type Chunker interface {
	// Chunk splits content into chunks based on the configured strategy.
	Chunk(content string, cfg types.ChunkConfig) []ChunkOutput
}

Chunker splits text into chunks for indexing.

type CreateCollectionOpts ¶

type CreateCollectionOpts struct {
	Name        string             // Required: collection name
	Description string             // Optional description
	ChunkConfig *types.ChunkConfig // Chunk configuration (uses default if nil)
}

CreateCollectionOpts contains options for creating a collection.

type DefaultChunker ¶

type DefaultChunker struct{}

DefaultChunker implements all chunking strategies.

func NewChunker ¶

func NewChunker() *DefaultChunker

NewChunker creates a new default chunker.

func (*DefaultChunker) Chunk ¶

func (c *DefaultChunker) Chunk(content string, cfg types.ChunkConfig) []ChunkOutput

Chunk splits content using the specified strategy.

type Engine ¶

type Engine struct {
	// contains filtered or unexported fields
}

Engine implements the knowledge store logic layer. It orchestrates chunking, embedding, and storage operations.

func NewEngine ¶

func NewEngine(store storage.Backend, emb embedding.Provider, cfg *config.KnowledgeConfig) (*Engine, error)

NewEngine creates a new knowledge engine.

func (*Engine) BulkIngest ¶

func (e *Engine) BulkIngest(ctx context.Context, namespace, collectionID string, documents []BulkIngestDocument, opts *BulkIngestOpts) (*BulkIngestResult, error)

BulkIngest ingests multiple documents into a collection with progress reporting. Documents are processed concurrently for efficiency.

func (*Engine) CollectionStats ¶

func (e *Engine) CollectionStats(ctx context.Context, namespace, collectionID string) (*types.CollectionStats, error)

CollectionStats returns statistics for a collection.

func (*Engine) CreateCollection ¶

func (e *Engine) CreateCollection(ctx context.Context, namespace string, opts CreateCollectionOpts) (*types.Collection, error)

CreateCollection creates a new collection.

func (*Engine) DeleteCollection ¶

func (e *Engine) DeleteCollection(ctx context.Context, namespace, collectionID string) error

DeleteCollection removes a collection and all its documents.

func (*Engine) DeleteDocument ¶

func (e *Engine) DeleteDocument(ctx context.Context, namespace, docID string) error

DeleteDocument removes a document and all its chunks.

func (*Engine) GetCollection ¶

func (e *Engine) GetCollection(ctx context.Context, namespace, collectionID string) (*types.Collection, error)

GetCollection retrieves a collection by ID.

func (*Engine) GetDocument ¶

func (e *Engine) GetDocument(ctx context.Context, namespace, docID string) (*types.Document, error)

GetDocument retrieves a document by ID.

func (*Engine) Ingest ¶

func (e *Engine) Ingest(ctx context.Context, namespace, collectionID, content string, opts *IngestOpts) (*IngestResult, error)

Ingest adds a document to a collection, chunking and generating embeddings.

func (*Engine) ListCollections ¶

func (e *Engine) ListCollections(ctx context.Context, namespace, cursor string, limit int) ([]*types.Collection, string, error)

ListCollections returns all collections in a namespace.

func (*Engine) Search ¶

func (e *Engine) Search(ctx context.Context, namespace, query string, opts *SearchOpts) (*SearchResult, error)

Search performs semantic search across knowledge in a namespace.

func (*Engine) SetExtractionEnqueuer ¶

func (e *Engine) SetExtractionEnqueuer(eq ExtractionEnqueuer)

SetExtractionEnqueuer sets the extraction enqueuer for entity extraction. When set, ingested chunks will be queued for background entity extraction.

type ExtractionEnqueuer ¶

type ExtractionEnqueuer interface {
	EnqueueForExtraction(ctx context.Context, namespace, sourceType, sourceID, content string) error
}

ExtractionEnqueuer queues content for entity extraction.

type IngestOpts ¶

type IngestOpts struct {
	Title       string             // Optional document title
	Source      string             // Source URL or identifier
	ContentType string             // "text", "markdown", "html"
	Metadata    map[string]string  // Optional metadata
	ChunkConfig *types.ChunkConfig // Override collection's default config
}

IngestOpts contains options for document ingestion.

type IngestResult ¶

type IngestResult struct {
	DocumentID    string `json:"document_id"`
	ChunksCreated int    `json:"chunks_created"`
	CollectionID  string `json:"collection_id"`
}

IngestResult contains the result of document ingestion.

type SearchMode ¶

type SearchMode string

SearchMode defines the search strategy.

const (
	// SearchModeVector uses pure vector similarity search (default).
	SearchModeVector SearchMode = "vector"
	// SearchModeHybrid combines vector and text search with RRF.
	SearchModeHybrid SearchMode = "hybrid"
	// SearchModeText uses pure full-text search (BM25).
	SearchModeText SearchMode = "text"
)

type SearchOpts ¶

type SearchOpts struct {
	CollectionID  *string           // Optional: limit to specific collection
	TopK          int               // Number of results (0 = default 10)
	MinScore      float64           // Minimum similarity score (0-1)
	Filters       map[string]string // Metadata filters
	ContextWindow int               // Chunks before/after to include (0 = none)
	SearchMode    SearchMode        // Search mode: "vector" (default), "hybrid", or "text"
	Alpha         float64           // Hybrid search weight: 0=pure text, 1=pure vector, 0.5=equal (default: 0.5)
}

SearchOpts contains options for knowledge search.

type SearchResult ¶

type SearchResult struct {
	Results    []*types.ChunkResult `json:"results"`
	Query      string               `json:"query"`
	TotalFound int                  `json:"total_found"`
}

SearchResult contains search results with optional context.

type SemanticChunker ¶

type SemanticChunker struct {
	// contains filtered or unexported fields
}

SemanticChunker uses embedding similarity to find natural topic boundaries.

func NewSemanticChunker ¶

func NewSemanticChunker(emb embedding.Provider, opts ...SemanticChunkerOption) *SemanticChunker

NewSemanticChunker creates a semantic chunker with the given embedding provider.

func (*SemanticChunker) Chunk ¶

func (c *SemanticChunker) Chunk(ctx context.Context, content string, cfg types.ChunkConfig) ([]ChunkOutput, error)

Chunk splits content into semantically coherent chunks. It embeds sliding windows of sentences and identifies breakpoints where the cosine similarity between adjacent windows drops below the threshold.

type SemanticChunkerOption ¶

type SemanticChunkerOption func(*SemanticChunker)

SemanticChunkerOption configures the semantic chunker.

func WithSimilarityThreshold ¶

func WithSimilarityThreshold(threshold float64) SemanticChunkerOption

WithSimilarityThreshold sets the similarity threshold for breakpoint detection.

func WithWindowSize ¶

func WithWindowSize(size int) SemanticChunkerOption

WithWindowSize sets the number of sentences per embedding window.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL