indexer

package
v0.0.0-...-16efc32 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 28, 2026 License: MIT Imports: 16 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DefaultChunkSize    = 512
	DefaultChunkOverlap = 50
	CharsPerToken       = 4 // Approximation: 4 chars ≈ 1 token for code
)

Variables

View Source
var MinifiedPatterns = []string{
	".min.js",
	".min.css",
	".bundle.js",
	".bundle.css",
}

MinifiedPatterns lists patterns for minified files to skip by default

View Source
var SupportedExtensions = map[string]bool{
	".go":     true,
	".js":     true,
	".ts":     true,
	".jsx":    true,
	".tsx":    true,
	".py":     true,
	".rb":     true,
	".java":   true,
	".c":      true,
	".cpp":    true,
	".cc":     true,
	".h":      true,
	".hpp":    true,
	".cs":     true,
	".php":    true,
	".rs":     true,
	".swift":  true,
	".kt":     true,
	".scala":  true,
	".vue":    true,
	".svelte": true,
	".html":   true,
	".css":    true,
	".scss":   true,
	".less":   true,
	".sql":    true,
	".sh":     true,
	".bash":   true,
	".zsh":    true,
	".yaml":   true,
	".yml":    true,
	".json":   true,
	".xml":    true,
	".md":     true,
	".txt":    true,
	".toml":   true,
	".ini":    true,
	".cfg":    true,
	".conf":   true,
	".env":    true,
	".lua":    true,
	".r":      true,
	".R":      true,
	".dart":   true,
	".ex":     true,
	".exs":    true,
	".erl":    true,
	".clj":    true,
	".hs":     true,
	".ml":     true,
	".fs":     true,
	".elm":    true,
	".nim":    true,
	".zig":    true,
	".proto":  true,
	".tf":     true,
	".hcl":    true,
	".pas":    true,
	".dpr":    true,
}

SupportedExtensions lists file extensions to index

Functions

func AddToGitignore

func AddToGitignore(projectRoot string, pattern string) error

AddToGitignore appends a pattern to .gitignore if not already present

func HashFile

func HashFile(path string) (string, error)

Types

type BatchProgressCallback

type BatchProgressCallback func(info BatchProgressInfo)

BatchProgressCallback is called for batch embedding progress and retry visibility

type BatchProgressInfo

type BatchProgressInfo struct {
	BatchIndex      int  // Current batch index (0-indexed)
	TotalBatches    int  // Total number of batches
	CompletedChunks int  // Number of chunks completed so far
	TotalChunks     int  // Total number of chunks to embed
	Retrying        bool // True if this is a retry attempt
	Attempt         int  // Retry attempt number (1-indexed, 0 if not retrying)
	StatusCode      int  // HTTP status code when retrying (429 = rate limited, 5xx = server error)
}

BatchProgressInfo contains progress information for batch embedding

type ChunkInfo

type ChunkInfo struct {
	ID        string
	FilePath  string
	StartLine int
	EndLine   int
	Content   string
	Hash      string
}

type Chunker

type Chunker struct {
	// contains filtered or unexported fields
}

func NewChunker

func NewChunker(chunkSize, overlap int) *Chunker

func (*Chunker) Chunk

func (c *Chunker) Chunk(filePath string, content string) []ChunkInfo

func (*Chunker) ChunkWithContext

func (c *Chunker) ChunkWithContext(filePath string, content string) []ChunkInfo

ChunkWithContext adds surrounding context to improve embedding quality

type FileInfo

type FileInfo struct {
	Path    string
	Size    int64
	ModTime int64
	Hash    string
	Content string
}

type IgnoreMatcher

type IgnoreMatcher struct {
	// contains filtered or unexported fields
}

func NewIgnoreMatcher

func NewIgnoreMatcher(projectRoot string, extraIgnore []string, externalGitignore string) (*IgnoreMatcher, error)

func (*IgnoreMatcher) ShouldIgnore

func (m *IgnoreMatcher) ShouldIgnore(path string) bool

type IndexStats

type IndexStats struct {
	FilesIndexed  int
	FilesSkipped  int
	ChunksCreated int
	FilesRemoved  int
	Duration      time.Duration
}

type Indexer

type Indexer struct {
	// contains filtered or unexported fields
}

func NewIndexer

func NewIndexer(
	root string,
	st store.VectorStore,
	emb embedder.Embedder,
	chunker *Chunker,
	scanner *Scanner,
	lastIndexTime time.Time,
) *Indexer

func (*Indexer) IndexAll

func (idx *Indexer) IndexAll(ctx context.Context) (*IndexStats, error)

IndexAll performs a full index of the project (no progress reporting)

func (*Indexer) IndexAllWithBatchProgress

func (idx *Indexer) IndexAllWithBatchProgress(ctx context.Context, onProgress ProgressCallback, onBatchProgress BatchProgressCallback) (*IndexStats, error)

IndexAllWithBatchProgress performs a full index with both file and batch progress reporting. When the embedder implements BatchEmbedder, files are processed in parallel using cross-file batching.

func (*Indexer) IndexAllWithProgress

func (idx *Indexer) IndexAllWithProgress(ctx context.Context, onProgress ProgressCallback) (*IndexStats, error)

IndexAllWithProgress performs a full index with progress reporting

func (*Indexer) IndexFile

func (idx *Indexer) IndexFile(ctx context.Context, file FileInfo) (int, error)

IndexFile indexes a single file

func (*Indexer) NeedsReindex

func (idx *Indexer) NeedsReindex(ctx context.Context, path string, hash string) (bool, error)

NeedsReindex checks if a file needs reindexing

func (*Indexer) RemoveFile

func (idx *Indexer) RemoveFile(ctx context.Context, path string) error

RemoveFile removes a file from the index

type ProgressCallback

type ProgressCallback func(info ProgressInfo)

ProgressCallback is called for each file during indexing

type ProgressInfo

type ProgressInfo struct {
	Current     int    // Current file number (1-indexed)
	Total       int    // Total number of files
	CurrentFile string // Path of current file being processed
}

ProgressInfo contains progress information for indexing

type Scanner

type Scanner struct {
	// contains filtered or unexported fields
}

func NewScanner

func NewScanner(root string, ignore *IgnoreMatcher) *Scanner

func (*Scanner) Scan

func (s *Scanner) Scan() ([]FileInfo, []string, error)

func (*Scanner) ScanFile

func (s *Scanner) ScanFile(relPath string) (*FileInfo, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL