indexer

package

v1.2.0 Latest Latest Go to latest Published: Oct 27, 2025 License: Apache-2.0 Imports: 20 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/mvp-joe/project-cortex

Links

Open Source Insights

Documentation ¶

Index ¶

type AtomicWriter
- func GetWriter(idx Indexer) *AtomicWriter
- func NewAtomicWriter(outputDir string) (*AtomicWriter, error)
- func (w *AtomicWriter) ReadChunkFile(filename string) (*ChunkFile, error)
- func (w *AtomicWriter) ReadMetadata() (*GeneratorMetadata, error)
- func (w *AtomicWriter) WriteChunkFile(filename string, chunkFile *ChunkFile) error
- func (w *AtomicWriter) WriteMetadata(metadata *GeneratorMetadata) error
type Chunk
type ChunkFile
type ChunkFileMetadata
type ChunkType
type Chunker
- func NewChunker(targetSize, overlap int) Chunker
type CodeExtraction
type Config
- func DefaultConfig(rootDir string) *Config
type DocumentationChunk
type FileDiscovery
- func NewFileDiscovery(rootDir string, codePatterns, docsPatterns, ignorePatterns []string) (*FileDiscovery, error)
- func (fd *FileDiscovery) DiscoverFiles() (codeFiles []string, docFiles []string, err error)
type Formatter
- func NewFormatter() Formatter
type GeneratorMetadata
type Indexer
- func New(config *Config) (Indexer, error)
- func NewWithProgress(config *Config, progress ProgressReporter) (Indexer, error)
type IndexerWatcher
- func NewIndexerWatcher(idx Indexer, rootDir string) (*IndexerWatcher, error)
- func (iw *IndexerWatcher) Start(ctx context.Context)
- func (iw *IndexerWatcher) Stop()
type NoOpProgressReporter
- func (n *NoOpProgressReporter) OnComplete(stats *ProcessingStats)
- func (n *NoOpProgressReporter) OnDiscoveryComplete(codeFiles, docFiles int)
- func (n *NoOpProgressReporter) OnDiscoveryStart()
- func (n *NoOpProgressReporter) OnEmbeddingProgress(processedChunks int)
- func (n *NoOpProgressReporter) OnEmbeddingStart(totalChunks int)
- func (n *NoOpProgressReporter) OnFileProcessed(fileName string)
- func (n *NoOpProgressReporter) OnFileProcessingStart(totalFiles int)
- func (n *NoOpProgressReporter) OnWritingChunks()
type Parser
- func NewParser() Parser
type ProcessingStats
type ProgressReporter

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type AtomicWriter ¶

type AtomicWriter struct {
	// contains filtered or unexported fields
}

AtomicWriter handles atomic file writing using temp → rename pattern.

func GetWriter ¶

func GetWriter(idx Indexer) *AtomicWriter

GetWriter returns the internal writer from an Indexer for testing purposes. This should only be used in tests.

func NewAtomicWriter ¶

func NewAtomicWriter(outputDir string) (*AtomicWriter, error)

NewAtomicWriter creates a new atomic writer.

func (*AtomicWriter) ReadChunkFile ¶

func (w *AtomicWriter) ReadChunkFile(filename string) (*ChunkFile, error)

ReadChunkFile reads an existing chunk file.

func (*AtomicWriter) ReadMetadata ¶

func (w *AtomicWriter) ReadMetadata() (*GeneratorMetadata, error)

ReadMetadata reads existing generator metadata.

func (*AtomicWriter) WriteChunkFile ¶

func (w *AtomicWriter) WriteChunkFile(filename string, chunkFile *ChunkFile) error

WriteChunkFile writes a chunk file atomically.

func (*AtomicWriter) WriteMetadata ¶

func (w *AtomicWriter) WriteMetadata(metadata *GeneratorMetadata) error

WriteMetadata writes generator metadata atomically.

type Chunk ¶

type Chunk struct {
	ID        string                 `json:"id"`
	ChunkType ChunkType              `json:"chunk_type"`
	Title     string                 `json:"title"`
	Text      string                 `json:"text"`
	Embedding []float32              `json:"embedding"`
	Tags      []string               `json:"tags"`
	Metadata  map[string]interface{} `json:"metadata"`
	CreatedAt time.Time              `json:"created_at"`
	UpdatedAt time.Time              `json:"updated_at"`
}

Chunk represents a piece of indexed content with its embedding.

type ChunkFile ¶

type ChunkFile struct {
	Metadata ChunkFileMetadata `json:"_metadata"`
	Chunks   []Chunk           `json:"chunks"`
}

ChunkFile represents the JSON structure for storing chunks.

type ChunkFileMetadata ¶

type ChunkFileMetadata struct {
	Model      string    `json:"model"`
	Dimensions int       `json:"dimensions"`
	ChunkType  ChunkType `json:"chunk_type"`
	Generated  time.Time `json:"generated"`
	Version    string    `json:"version"`
}

ChunkFileMetadata contains metadata about the chunk file.

type ChunkType ¶

type ChunkType string

ChunkType represents the type of content in a chunk.

const (
	ChunkTypeSymbols       ChunkType = "symbols"
	ChunkTypeDefinitions   ChunkType = "definitions"
	ChunkTypeData          ChunkType = "data"
	ChunkTypeDocumentation ChunkType = "documentation"
)

type Chunker ¶

type Chunker interface {
	// ChunkDocument splits a markdown file into semantic chunks.
	// Returns a slice of DocumentationChunk.
	ChunkDocument(ctx context.Context, filePath string, content string) ([]DocumentationChunk, error)
}

Chunker splits documentation files into semantic chunks.

func NewChunker ¶

func NewChunker(targetSize, overlap int) Chunker

NewChunker creates a new documentation chunker.

type CodeExtraction ¶

type CodeExtraction struct {
	// Symbols contains high-level overview (package, imports count, type/function names)
	Symbols *extraction.SymbolsData

	// Definitions contains full type definitions and function signatures
	Definitions *extraction.DefinitionsData

	// Data contains constants, global variables, and configuration
	Data *extraction.DataData

	// Metadata about the extraction
	Language  string
	FilePath  string
	StartLine int
	EndLine   int
}

CodeExtraction represents the three-tier extraction from a source code file.

type Config ¶

type Config struct {
	// Root directory of the codebase to index
	RootDir string

	// Paths configuration
	CodePatterns   []string
	DocsPatterns   []string
	IgnorePatterns []string

	// Chunking configuration
	ChunkStrategies []string // ["symbols", "definitions", "data"]
	DocChunkSize    int      // tokens
	CodeChunkSize   int      // characters
	Overlap         int      // tokens

	// Output configuration
	OutputDir string // .cortex/chunks/

	// Embedding configuration
	EmbeddingProvider string
	EmbeddingModel    string
	EmbeddingDims     int
	EmbeddingEndpoint string
	EmbeddingBinary   string
}

Config contains configuration for the indexer.

func DefaultConfig ¶

func DefaultConfig(rootDir string) *Config

DefaultConfig returns a configuration with sensible defaults.

type DocumentationChunk ¶

type DocumentationChunk struct {
	FilePath         string
	SectionIndex     int
	ChunkIndex       int
	Text             string
	StartLine        int
	EndLine          int
	IsLargeParagraph bool
	IsSplitParagraph bool
}

DocumentationChunk represents a chunk of documentation content.

type FileDiscovery ¶

type FileDiscovery struct {
	// contains filtered or unexported fields
}

FileDiscovery handles file discovery with glob patterns and ignore rules.

func NewFileDiscovery ¶

func NewFileDiscovery(rootDir string, codePatterns, docsPatterns, ignorePatterns []string) (*FileDiscovery, error)

NewFileDiscovery creates a new file discovery instance.

func (*FileDiscovery) DiscoverFiles ¶

func (fd *FileDiscovery) DiscoverFiles() (codeFiles []string, docFiles []string, err error)

DiscoverFiles walks the directory tree and returns code and doc files.

type Formatter ¶

type Formatter interface {
	// FormatSymbols converts SymbolsData into natural language text.
	FormatSymbols(data *extraction.SymbolsData, language string) string

	// FormatDefinitions converts DefinitionsData into formatted code with line comments.
	FormatDefinitions(data *extraction.DefinitionsData, language string) string

	// FormatData converts DataData into formatted code with line comments.
	FormatData(data *extraction.DataData, language string) string

	// FormatDocumentation formats a documentation chunk (may add context).
	FormatDocumentation(chunk *DocumentationChunk) string
}

Formatter converts code extractions and doc chunks into natural language text.

func NewFormatter ¶

func NewFormatter() Formatter

NewFormatter creates a new formatter instance.

type GeneratorMetadata ¶

type GeneratorMetadata struct {
	Version       string               `json:"version"`
	GeneratedAt   time.Time            `json:"generated_at"`
	FileChecksums map[string]string    `json:"file_checksums"`
	FileMtimes    map[string]time.Time `json:"file_mtimes"`
	Stats         ProcessingStats      `json:"stats"`
}

GeneratorMetadata tracks file checksums and processing stats for incremental indexing.

type Indexer ¶

type Indexer interface {
	// Index processes all files in the codebase and generates chunk files.
	// Returns statistics about the indexing process.
	Index(ctx context.Context) (*ProcessingStats, error)

	// IndexIncremental processes only changed files based on checksums.
	// Returns statistics about the indexing process.
	IndexIncremental(ctx context.Context) (*ProcessingStats, error)

	// Watch starts watching for file changes and reindexes incrementally.
	// Blocks until context is cancelled.
	Watch(ctx context.Context) error

	// Close releases all resources held by the indexer.
	Close() error
}

Indexer provides the main interface for indexing codebase content.

func New ¶

func New(config *Config) (Indexer, error)

New creates a new indexer instance.

func NewWithProgress ¶

func NewWithProgress(config *Config, progress ProgressReporter) (Indexer, error)

NewWithProgress creates a new indexer instance with a custom progress reporter.

type IndexerWatcher ¶

type IndexerWatcher struct {
	// contains filtered or unexported fields
}

IndexerWatcher watches the root directory for file changes and triggers incremental reindexing.

func NewIndexerWatcher ¶

func NewIndexerWatcher(idx Indexer, rootDir string) (*IndexerWatcher, error)

NewIndexerWatcher creates a new file watcher for the indexer.

func (*IndexerWatcher) Start ¶

func (iw *IndexerWatcher) Start(ctx context.Context)

Start begins watching for file changes.

func (*IndexerWatcher) Stop ¶

func (iw *IndexerWatcher) Stop()

Stop stops the file watcher.

type NoOpProgressReporter ¶

type NoOpProgressReporter struct{}

NoOpProgressReporter is a progress reporter that does nothing. Used when progress reporting is disabled (e.g., --quiet flag).

func (*NoOpProgressReporter) OnComplete ¶

func (n *NoOpProgressReporter) OnComplete(stats *ProcessingStats)

func (*NoOpProgressReporter) OnDiscoveryComplete ¶

func (n *NoOpProgressReporter) OnDiscoveryComplete(codeFiles, docFiles int)

func (*NoOpProgressReporter) OnDiscoveryStart ¶

func (n *NoOpProgressReporter) OnDiscoveryStart()

func (*NoOpProgressReporter) OnEmbeddingProgress ¶

func (n *NoOpProgressReporter) OnEmbeddingProgress(processedChunks int)

func (*NoOpProgressReporter) OnEmbeddingStart ¶

func (n *NoOpProgressReporter) OnEmbeddingStart(totalChunks int)

func (*NoOpProgressReporter) OnFileProcessed ¶

func (n *NoOpProgressReporter) OnFileProcessed(fileName string)

func (*NoOpProgressReporter) OnFileProcessingStart ¶

func (n *NoOpProgressReporter) OnFileProcessingStart(totalFiles int)

func (*NoOpProgressReporter) OnWritingChunks ¶

func (n *NoOpProgressReporter) OnWritingChunks()

type Parser ¶

type Parser interface {
	// ParseFile extracts code structure from a source file.
	// Returns CodeExtraction containing symbols, definitions, and data.
	ParseFile(ctx context.Context, filePath string) (*CodeExtraction, error)

	// SupportsLanguage checks if this parser supports the given language.
	SupportsLanguage(language string) bool
}

Parser extracts structured information from source code files.

func NewParser ¶

func NewParser() Parser

NewParser creates a new parser instance that supports all languages.

type ProcessingStats ¶

type ProcessingStats struct {
	DocsProcessed         int     `json:"docs_processed"`
	CodeFilesProcessed    int     `json:"code_files_processed"`
	TotalDocChunks        int     `json:"total_doc_chunks"`
	TotalCodeChunks       int     `json:"total_code_chunks"`
	ProcessingTimeSeconds float64 `json:"processing_time_seconds"`
}

ProcessingStats tracks statistics about the indexing process.

type ProgressReporter ¶

type ProgressReporter interface {
	// OnDiscoveryStart is called when file discovery begins.
	OnDiscoveryStart()

	// OnDiscoveryComplete is called when file discovery finishes.
	OnDiscoveryComplete(codeFiles, docFiles int)

	// OnFileProcessingStart is called before processing files.
	OnFileProcessingStart(totalFiles int)

	// OnFileProcessed is called after each file is processed.
	OnFileProcessed(fileName string)

	// OnEmbeddingStart is called before generating embeddings.
	OnEmbeddingStart(totalChunks int)

	// OnEmbeddingProgress is called after each batch of embeddings.
	OnEmbeddingProgress(processedChunks int)

	// OnWritingChunks is called when writing chunk files begins.
	OnWritingChunks()

	// OnComplete is called when indexing completes successfully.
	OnComplete(stats *ProcessingStats)
}

ProgressReporter provides callbacks for reporting indexing progress. Implementations can display progress bars, log messages, or remain silent.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
extraction
parsers

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL