Documentation
¶
Index ¶
- type AtomicWriter
- type Chunk
- type ChunkFile
- type ChunkFileMetadata
- type ChunkType
- type Chunker
- type CodeExtraction
- type Config
- type DocumentationChunk
- type FileDiscovery
- type Formatter
- type GeneratorMetadata
- type Indexer
- type IndexerWatcher
- type NoOpProgressReporter
- func (n *NoOpProgressReporter) OnComplete(stats *ProcessingStats)
- func (n *NoOpProgressReporter) OnDiscoveryComplete(codeFiles, docFiles int)
- func (n *NoOpProgressReporter) OnDiscoveryStart()
- func (n *NoOpProgressReporter) OnEmbeddingProgress(processedChunks int)
- func (n *NoOpProgressReporter) OnEmbeddingStart(totalChunks int)
- func (n *NoOpProgressReporter) OnFileProcessed(fileName string)
- func (n *NoOpProgressReporter) OnFileProcessingStart(totalFiles int)
- func (n *NoOpProgressReporter) OnWritingChunks()
- type Parser
- type ProcessingStats
- type ProgressReporter
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AtomicWriter ¶
type AtomicWriter struct {
// contains filtered or unexported fields
}
AtomicWriter handles atomic file writing using temp → rename pattern.
func GetWriter ¶
func GetWriter(idx Indexer) *AtomicWriter
GetWriter returns the internal writer from an Indexer for testing purposes. This should only be used in tests.
func NewAtomicWriter ¶
func NewAtomicWriter(outputDir string) (*AtomicWriter, error)
NewAtomicWriter creates a new atomic writer.
func (*AtomicWriter) ReadChunkFile ¶
func (w *AtomicWriter) ReadChunkFile(filename string) (*ChunkFile, error)
ReadChunkFile reads an existing chunk file.
func (*AtomicWriter) ReadMetadata ¶
func (w *AtomicWriter) ReadMetadata() (*GeneratorMetadata, error)
ReadMetadata reads existing generator metadata.
func (*AtomicWriter) WriteChunkFile ¶
func (w *AtomicWriter) WriteChunkFile(filename string, chunkFile *ChunkFile) error
WriteChunkFile writes a chunk file atomically.
func (*AtomicWriter) WriteMetadata ¶
func (w *AtomicWriter) WriteMetadata(metadata *GeneratorMetadata) error
WriteMetadata writes generator metadata atomically.
type Chunk ¶
type Chunk struct {
ID string `json:"id"`
ChunkType ChunkType `json:"chunk_type"`
Title string `json:"title"`
Text string `json:"text"`
Embedding []float32 `json:"embedding"`
Tags []string `json:"tags"`
Metadata map[string]interface{} `json:"metadata"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
Chunk represents a piece of indexed content with its embedding.
type ChunkFile ¶
type ChunkFile struct {
Metadata ChunkFileMetadata `json:"_metadata"`
Chunks []Chunk `json:"chunks"`
}
ChunkFile represents the JSON structure for storing chunks.
type ChunkFileMetadata ¶
type ChunkFileMetadata struct {
Model string `json:"model"`
Dimensions int `json:"dimensions"`
ChunkType ChunkType `json:"chunk_type"`
Generated time.Time `json:"generated"`
Version string `json:"version"`
}
ChunkFileMetadata contains metadata about the chunk file.
type Chunker ¶
type Chunker interface {
// ChunkDocument splits a markdown file into semantic chunks.
// Returns a slice of DocumentationChunk.
ChunkDocument(ctx context.Context, filePath string, content string) ([]DocumentationChunk, error)
}
Chunker splits documentation files into semantic chunks.
func NewChunker ¶
NewChunker creates a new documentation chunker.
type CodeExtraction ¶
type CodeExtraction struct {
// Symbols contains high-level overview (package, imports count, type/function names)
Symbols *extraction.SymbolsData
// Definitions contains full type definitions and function signatures
Definitions *extraction.DefinitionsData
// Data contains constants, global variables, and configuration
Data *extraction.DataData
// Metadata about the extraction
Language string
FilePath string
StartLine int
EndLine int
}
CodeExtraction represents the three-tier extraction from a source code file.
type Config ¶
type Config struct {
// Root directory of the codebase to index
RootDir string
// Paths configuration
CodePatterns []string
DocsPatterns []string
IgnorePatterns []string
// Chunking configuration
ChunkStrategies []string // ["symbols", "definitions", "data"]
DocChunkSize int // tokens
CodeChunkSize int // characters
Overlap int // tokens
// Output configuration
OutputDir string // .cortex/chunks/
// Embedding configuration
EmbeddingProvider string
EmbeddingModel string
EmbeddingDims int
EmbeddingEndpoint string
EmbeddingBinary string
}
Config contains configuration for the indexer.
func DefaultConfig ¶
DefaultConfig returns a configuration with sensible defaults.
type DocumentationChunk ¶
type DocumentationChunk struct {
FilePath string
SectionIndex int
ChunkIndex int
Text string
StartLine int
EndLine int
IsLargeParagraph bool
IsSplitParagraph bool
}
DocumentationChunk represents a chunk of documentation content.
type FileDiscovery ¶
type FileDiscovery struct {
// contains filtered or unexported fields
}
FileDiscovery handles file discovery with glob patterns and ignore rules.
func NewFileDiscovery ¶
func NewFileDiscovery(rootDir string, codePatterns, docsPatterns, ignorePatterns []string) (*FileDiscovery, error)
NewFileDiscovery creates a new file discovery instance.
func (*FileDiscovery) DiscoverFiles ¶
func (fd *FileDiscovery) DiscoverFiles() (codeFiles []string, docFiles []string, err error)
DiscoverFiles walks the directory tree and returns code and doc files.
type Formatter ¶
type Formatter interface {
// FormatSymbols converts SymbolsData into natural language text.
FormatSymbols(data *extraction.SymbolsData, language string) string
// FormatDefinitions converts DefinitionsData into formatted code with line comments.
FormatDefinitions(data *extraction.DefinitionsData, language string) string
// FormatData converts DataData into formatted code with line comments.
FormatData(data *extraction.DataData, language string) string
// FormatDocumentation formats a documentation chunk (may add context).
FormatDocumentation(chunk *DocumentationChunk) string
}
Formatter converts code extractions and doc chunks into natural language text.
type GeneratorMetadata ¶
type GeneratorMetadata struct {
Version string `json:"version"`
GeneratedAt time.Time `json:"generated_at"`
FileChecksums map[string]string `json:"file_checksums"`
FileMtimes map[string]time.Time `json:"file_mtimes"`
Stats ProcessingStats `json:"stats"`
}
GeneratorMetadata tracks file checksums and processing stats for incremental indexing.
type Indexer ¶
type Indexer interface {
// Index processes all files in the codebase and generates chunk files.
// Returns statistics about the indexing process.
Index(ctx context.Context) (*ProcessingStats, error)
// IndexIncremental processes only changed files based on checksums.
// Returns statistics about the indexing process.
IndexIncremental(ctx context.Context) (*ProcessingStats, error)
// Watch starts watching for file changes and reindexes incrementally.
// Blocks until context is cancelled.
Watch(ctx context.Context) error
// Close releases all resources held by the indexer.
Close() error
}
Indexer provides the main interface for indexing codebase content.
func NewWithProgress ¶
func NewWithProgress(config *Config, progress ProgressReporter) (Indexer, error)
NewWithProgress creates a new indexer instance with a custom progress reporter.
type IndexerWatcher ¶
type IndexerWatcher struct {
// contains filtered or unexported fields
}
IndexerWatcher watches the root directory for file changes and triggers incremental reindexing.
func NewIndexerWatcher ¶
func NewIndexerWatcher(idx Indexer, rootDir string) (*IndexerWatcher, error)
NewIndexerWatcher creates a new file watcher for the indexer.
func (*IndexerWatcher) Start ¶
func (iw *IndexerWatcher) Start(ctx context.Context)
Start begins watching for file changes.
type NoOpProgressReporter ¶
type NoOpProgressReporter struct{}
NoOpProgressReporter is a progress reporter that does nothing. Used when progress reporting is disabled (e.g., --quiet flag).
func (*NoOpProgressReporter) OnComplete ¶
func (n *NoOpProgressReporter) OnComplete(stats *ProcessingStats)
func (*NoOpProgressReporter) OnDiscoveryComplete ¶
func (n *NoOpProgressReporter) OnDiscoveryComplete(codeFiles, docFiles int)
func (*NoOpProgressReporter) OnDiscoveryStart ¶
func (n *NoOpProgressReporter) OnDiscoveryStart()
func (*NoOpProgressReporter) OnEmbeddingProgress ¶
func (n *NoOpProgressReporter) OnEmbeddingProgress(processedChunks int)
func (*NoOpProgressReporter) OnEmbeddingStart ¶
func (n *NoOpProgressReporter) OnEmbeddingStart(totalChunks int)
func (*NoOpProgressReporter) OnFileProcessed ¶
func (n *NoOpProgressReporter) OnFileProcessed(fileName string)
func (*NoOpProgressReporter) OnFileProcessingStart ¶
func (n *NoOpProgressReporter) OnFileProcessingStart(totalFiles int)
func (*NoOpProgressReporter) OnWritingChunks ¶
func (n *NoOpProgressReporter) OnWritingChunks()
type Parser ¶
type Parser interface {
// ParseFile extracts code structure from a source file.
// Returns CodeExtraction containing symbols, definitions, and data.
ParseFile(ctx context.Context, filePath string) (*CodeExtraction, error)
// SupportsLanguage checks if this parser supports the given language.
SupportsLanguage(language string) bool
}
Parser extracts structured information from source code files.
type ProcessingStats ¶
type ProcessingStats struct {
DocsProcessed int `json:"docs_processed"`
CodeFilesProcessed int `json:"code_files_processed"`
TotalDocChunks int `json:"total_doc_chunks"`
TotalCodeChunks int `json:"total_code_chunks"`
ProcessingTimeSeconds float64 `json:"processing_time_seconds"`
}
ProcessingStats tracks statistics about the indexing process.
type ProgressReporter ¶
type ProgressReporter interface {
// OnDiscoveryStart is called when file discovery begins.
OnDiscoveryStart()
// OnDiscoveryComplete is called when file discovery finishes.
OnDiscoveryComplete(codeFiles, docFiles int)
// OnFileProcessingStart is called before processing files.
OnFileProcessingStart(totalFiles int)
// OnFileProcessed is called after each file is processed.
OnFileProcessed(fileName string)
// OnEmbeddingStart is called before generating embeddings.
OnEmbeddingStart(totalChunks int)
// OnEmbeddingProgress is called after each batch of embeddings.
OnEmbeddingProgress(processedChunks int)
// OnWritingChunks is called when writing chunk files begins.
OnWritingChunks()
// OnComplete is called when indexing completes successfully.
OnComplete(stats *ProcessingStats)
}
ProgressReporter provides callbacks for reporting indexing progress. Implementations can display progress bars, log messages, or remain silent.