indexer

package

v0.1.7 Latest Latest Go to latest Published: Mar 27, 2026 License: MIT Imports: 35 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/posit-dev/code-index

Links

Open Source Insights

Documentation ¶

Overview ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Index ¶

Constants
func BuildEmbeddingText(name, signature, summary, doc, file string) string
func FunctionCacheKey(filePath, funcName, receiver string) string
func PrintStats(index *SearchIndex) string
func SaveCacheManifest(outputDir string, manifest *CacheManifest) error
func WriteIndex(index *SearchIndex, outputDir string) error
type AWSConfig
type BedrockCohereEmbedder
- func NewBedrockEmbedder(ctx context.Context, model, region string) (*BedrockCohereEmbedder, error)
- func (e *BedrockCohereEmbedder) EmbedDocument(ctx context.Context, text string) ([]float32, error)
- func (e *BedrockCohereEmbedder) EmbedQuery(ctx context.Context, text string) ([]float32, error)
- func (e *BedrockCohereEmbedder) Name() string
type BedrockLLMBackend
- func NewBedrockLLMBackend(region string) (*BedrockLLMBackend, error)
- func (b *BedrockLLMBackend) Call(model, prompt string) (string, error)
- func (b *BedrockLLMBackend) Name() string
type CParser
- func NewCParser(srcRoot string, excludes []string) *CParser
- func (p *CParser) Parse(result *ParseResult) error
type CacheManifest
- func LoadCacheManifest(outputDir string) (*CacheManifest, error)
- func NewCacheManifest() *CacheManifest
type DiffResult
- func ComputeDiff(parsed *ParseResult, cache *CacheManifest) *DiffResult
- func NewDiffResult() *DiffResult
type DocumentMetadata
type EmbedCache
- func LoadEmbedCache(outputDir string) (*EmbedCache, error)
- func NewEmbedCache() *EmbedCache
- func (c *EmbedCache) IsUpToDate(id, contentHash string) bool
- func (c *EmbedCache) Remove(id string)
- func (c *EmbedCache) Save(outputDir string) error
- func (c *EmbedCache) Set(id, contentHash string)
type Embedder
- func NewEmbedder(ctx context.Context, cfg EmbeddingsConfig, awsRegion string) (Embedder, error)
type EmbeddingsConfig
type FieldInfo
type FileCache
type FileEntry
type FileInfo
type FunctionCache
type FunctionEntry
type FunctionInfo
type GenerateStats
type Generator
- func NewGenerator(outputDir string, config *IndexConfig, dryRun bool, opts ...GeneratorOption) (*Generator, error)
- func (g *Generator) Generate(parsed *ParseResult, diff *DiffResult, cache *CacheManifest) (*GenerateStats, error)
type GeneratorOption
- func WithBackendOverride(backend string) GeneratorOption
- func WithMaxFiles(n int) GeneratorOption
- func WithVerbose(v bool) GeneratorOption
type IndexConfig
- func LoadConfig(repoRoot string) (*IndexConfig, error)
- func (c *IndexConfig) FunctionModel() string
- func (c *IndexConfig) SummaryModel() string
type LLMBackend
type LLMConfig
type MarkdownParser
- func NewMarkdownParser(srcRoot string, excludes []string) *MarkdownParser
- func (p *MarkdownParser) Parse(result *ParseResult) error
type OpenAIEmbedder
- func NewOpenAIEmbedder(model, baseURL, apiKeyEnv string) (*OpenAIEmbedder, error)
- func (e *OpenAIEmbedder) EmbedDocument(ctx context.Context, text string) ([]float32, error)
- func (e *OpenAIEmbedder) EmbedQuery(ctx context.Context, text string) ([]float32, error)
- func (e *OpenAIEmbedder) Name() string
type OpenAILLMBackend
- func NewOpenAILLMBackend(baseURL, apiKeyEnv string) (*OpenAILLMBackend, error)
- func (b *OpenAILLMBackend) Call(model, prompt string) (string, error)
- func (b *OpenAILLMBackend) Name() string
type PackageCache
type PackageEntry
type PackageInfo
type ParseResult
- func NewParseResult() *ParseResult
type Parser
- func NewParser(srcRoot, modulePrefix string) *Parser
- func NewParserWithConfig(srcRoot, modulePrefix string, excludes, vendorIncludes []string) *Parser
- func (p *Parser) Parse() (*ParseResult, error)
- func (p *Parser) ParseInto(result *ParseResult) error
type PythonParser
- func NewPythonParser(srcRoot string, excludes []string) *PythonParser
- func (p *PythonParser) Parse(result *ParseResult) error
type RConfig
type RParser
- func NewRParser(srcRoot string, excludes []string) *RParser
- func NewRParserWithConfig(srcRoot string, excludes []string, rscriptBin, repoRoot string) *RParser
- func (p *RParser) Parse(result *ParseResult) error
type SearchIndex
- func BuildIndex(parsed *ParseResult, outputDir string) (*SearchIndex, error)
type SearchResult
type SourceConfig
type StorageConfig
type TSParser
- func NewTSParser(srcRoot string, excludes []string) *TSParser
- func (p *TSParser) Parse(result *ParseResult) error
type TypeEntry
type TypeInfo
type VectorStore
- func OpenVectorStore(outputDir string, dimensions int) (*VectorStore, error)
- func (vs *VectorStore) AddDocument(ctx context.Context, id, content string, embedding []float32, ...) error
- func (vs *VectorStore) Close() error
- func (vs *VectorStore) Count() int
- func (vs *VectorStore) Dimensions() int
- func (vs *VectorStore) Reset(ctx context.Context) error
- func (vs *VectorStore) Search(ctx context.Context, queryEmbedding []float32, maxResults int) ([]SearchResult, error)

Constants ¶

View Source

const CacheDir = "docs"

CacheDir is the subdirectory name for cached docs.

Variables ¶

This section is empty.

Functions ¶

func BuildEmbeddingText ¶

func BuildEmbeddingText(name, signature, summary, doc, file string) string

BuildEmbeddingText constructs the text to embed for an entry.

func FunctionCacheKey ¶

func FunctionCacheKey(filePath, funcName, receiver string) string

FunctionCacheKey returns the cache key for a function.

func PrintStats ¶

func PrintStats(index *SearchIndex) string

PrintStats prints summary statistics for the index.

func SaveCacheManifest ¶

func SaveCacheManifest(outputDir string, manifest *CacheManifest) error

SaveCacheManifest writes the cache manifest to the output directory.

func WriteIndex ¶

func WriteIndex(index *SearchIndex, outputDir string) error

WriteIndex writes the search index to a JSON file.

Types ¶

type BedrockCohereEmbedder ¶

type BedrockCohereEmbedder struct {
	// contains filtered or unexported fields
}

BedrockCohereEmbedder uses Cohere embedding models via AWS Bedrock.

func NewBedrockEmbedder ¶

func NewBedrockEmbedder(ctx context.Context, model, region string) (*BedrockCohereEmbedder, error)

NewBedrockEmbedder creates an embedder using a Bedrock embedding model.

func (*BedrockCohereEmbedder) EmbedDocument ¶

func (e *BedrockCohereEmbedder) EmbedDocument(ctx context.Context, text string) ([]float32, error)

func (*BedrockCohereEmbedder) EmbedQuery ¶

func (e *BedrockCohereEmbedder) EmbedQuery(ctx context.Context, text string) ([]float32, error)

func (*BedrockCohereEmbedder) Name ¶

func (e *BedrockCohereEmbedder) Name() string

type BedrockLLMBackend ¶

type BedrockLLMBackend struct {
	// contains filtered or unexported fields
}

BedrockLLMBackend uses Claude models on AWS Bedrock for LLM calls.

func NewBedrockLLMBackend ¶

func NewBedrockLLMBackend(region string) (*BedrockLLMBackend, error)

NewBedrockLLMBackend creates a Bedrock LLM backend.

func (*BedrockLLMBackend) Call ¶

func (b *BedrockLLMBackend) Call(model, prompt string) (string, error)

func (*BedrockLLMBackend) Name ¶

func (b *BedrockLLMBackend) Name() string

type CParser ¶

type CParser struct {
	// contains filtered or unexported fields
}

CParser extracts structured information from C/C++ source files using tree-sitter.

func (*CParser) Parse ¶

func (p *CParser) Parse(result *ParseResult) error

Parse walks the source tree and extracts all file, function, and type information.

type CacheManifest ¶

type CacheManifest struct {
	// Commit is the git commit SHA that was last indexed.
	Commit string `json:"commit"`
	// Functions tracks per-function cache state.
	Functions map[string]*FunctionCache `json:"functions"`
	// Files tracks per-file cache state.
	Files map[string]*FileCache `json:"files"`
	// Packages tracks per-package cache state.
	Packages map[string]*PackageCache `json:"packages"`
}

CacheManifest tracks what has been indexed for incremental updates.

func LoadCacheManifest ¶

func LoadCacheManifest(outputDir string) (*CacheManifest, error)

LoadCacheManifest reads the cache manifest from the output directory. Returns an empty manifest if the file doesn't exist.

func NewCacheManifest ¶

func NewCacheManifest() *CacheManifest

NewCacheManifest creates an empty cache manifest.

type DiffResult ¶

type DiffResult struct {
	// Functions that need doc regeneration, keyed by "file::FuncName".
	ChangedFunctions map[string]*FunctionInfo
	// Files that need doc regeneration.
	ChangedFiles map[string]*FileInfo
	// Packages that need doc regeneration.
	ChangedPackages map[string]*PackageInfo
	// Functions that were removed (clean up from cache).
	RemovedFunctions []string
	// Files that were removed.
	RemovedFiles []string
	// Packages that were removed.
	RemovedPackages []string
}

DiffResult describes what needs to be regenerated.

func ComputeDiff ¶

func ComputeDiff(parsed *ParseResult, cache *CacheManifest) *DiffResult

ComputeDiff compares the current parse result against the cache manifest and determines what needs to be regenerated.

func NewDiffResult ¶

func NewDiffResult() *DiffResult

NewDiffResult creates an empty diff result.

type DocumentMetadata ¶

type DocumentMetadata struct {
	Kind      string // "function", "type", "file", "package"
	Name      string
	Signature string
	File      string
	Line      int
	Receiver  string
	Package   string
	Summary   string
	Doc       string
}

DocumentMetadata holds the metadata stored alongside each vector.

type EmbedCache ¶

type EmbedCache struct {
	// Items maps document ID to the hash of the content that was embedded.
	Items map[string]string `json:"items"`
}

EmbedCache tracks which items have been embedded and with what content hash. This allows incremental updates — only items whose content has changed need to be re-embedded.

func LoadEmbedCache ¶

func LoadEmbedCache(outputDir string) (*EmbedCache, error)

LoadEmbedCache reads the embed cache from disk. Returns an empty cache if the file doesn't exist.

func NewEmbedCache ¶

func NewEmbedCache() *EmbedCache

NewEmbedCache creates an empty embed cache.

func (*EmbedCache) IsUpToDate ¶

func (c *EmbedCache) IsUpToDate(id, contentHash string) bool

IsUpToDate returns true if the item has already been embedded with the same content hash.

func (*EmbedCache) Remove ¶

func (c *EmbedCache) Remove(id string)

Remove deletes an item from the cache.

func (*EmbedCache) Save ¶

func (c *EmbedCache) Save(outputDir string) error

Save writes the embed cache to disk.

func (*EmbedCache) Set ¶

func (c *EmbedCache) Set(id, contentHash string)

Set records that an item has been embedded with the given content hash.

type Embedder ¶

type Embedder interface {
	// EmbedDocument generates an embedding for a document (for indexing).
	EmbedDocument(ctx context.Context, text string) ([]float32, error)
	// EmbedQuery generates an embedding for a search query.
	// Some models optimize differently for queries vs documents.
	EmbedQuery(ctx context.Context, text string) ([]float32, error)
	// Name returns a human-readable name for this embedder.
	Name() string
}

Embedder generates vector embeddings from text.

func NewEmbedder ¶

func NewEmbedder(ctx context.Context, cfg EmbeddingsConfig, awsRegion string) (Embedder, error)

NewEmbedder creates an embedder based on the provider configuration.

type EmbeddingsConfig ¶

type EmbeddingsConfig struct {
	// Provider: "bedrock" or "openai". Default: "bedrock".
	Provider string `json:"provider,omitempty"`
	// BaseURL is the API base URL (openai provider only). Default: "https://api.openai.com/v1".
	BaseURL string `json:"base_url,omitempty"`
	// APIKeyEnv is the env var name containing the API key (openai provider only). Default: "OPENAI_API_KEY".
	APIKeyEnv string `json:"api_key_env,omitempty"`
	// Model is the embedding model ID.
	Model string `json:"model,omitempty"`
}

EmbeddingsConfig defines the embedding provider and model.

type FieldInfo ¶

type FieldInfo struct {
	Name string `json:"name"`
	Type string `json:"type"`
	Tag  string `json:"tag,omitempty"`
	Doc  string `json:"doc,omitempty"`
}

FieldInfo holds information about a struct field.

type FileCache ¶

type FileCache struct {
	FuncDocHash   string    `json:"func_doc_hash"`
	LastGenerated time.Time `json:"last_generated"`
}

FileCache tracks cache state for a single file.

type FileInfo ¶

type FileInfo struct {
	// Path is the relative file path from the source root.
	Path string `json:"path"`
	// Package is the package or module name.
	Package string `json:"package"`
	// ImportPath is the full import path of the containing package.
	ImportPath string `json:"import_path"`
	// Doc is the file-level doc comment or description.
	Doc string `json:"doc,omitempty"`
	// Functions defined in this file.
	Functions []FunctionInfo `json:"functions,omitempty"`
	// Types defined in this file.
	Types []TypeInfo `json:"types,omitempty"`
	// ASTHash is a hash of all function and type hashes in this file.
	ASTHash string `json:"ast_hash"`
}

FileInfo holds parsed information about a single source file.

type FunctionCache ¶

type FunctionCache struct {
	ASTHash       string    `json:"ast_hash"`
	SigHash       string    `json:"sig_hash"`
	DocHash       string    `json:"doc_hash"`
	LastGenerated time.Time `json:"last_generated"`
}

FunctionCache tracks cache state for a single function.

type FunctionEntry ¶

type FunctionEntry struct {
	Name      string `json:"name"`
	Receiver  string `json:"receiver,omitempty"`
	Signature string `json:"signature"`
	Doc       string `json:"doc,omitempty"`
	Summary   string `json:"summary,omitempty"`
	File      string `json:"file"`
	Line      int    `json:"line"`
	Exported  bool   `json:"exported"`
}

FunctionEntry is a function in the search index.

type FunctionInfo ¶

type FunctionInfo struct {
	// Name is the function name.
	Name string `json:"name"`
	// Receiver is the receiver type for methods (empty for functions).
	Receiver string `json:"receiver,omitempty"`
	// Signature is the full function signature.
	Signature string `json:"signature"`
	// Doc is the documentation comment.
	Doc string `json:"doc,omitempty"`
	// File is the relative file path.
	File string `json:"file"`
	// Line is the starting line number.
	Line int `json:"line"`
	// Exported indicates whether the function is exported/public.
	Exported bool `json:"exported"`
	// ASTHash is a hash of the normalized function AST.
	ASTHash string `json:"ast_hash"`
	// SigHash is a hash of just the function signature (params + returns).
	SigHash string `json:"sig_hash"`
}

FunctionInfo holds parsed information about a function or method.

type GenerateStats ¶

type GenerateStats struct {
	FunctionsGenerated int
	FilesGenerated     int
	PackagesGenerated  int
	FunctionsSkipped   int
	FilesSkipped       int
	PackagesSkipped    int
	FunctionsRemoved   int
	FilesRemoved       int
	PackagesRemoved    int
}

GenerateStats tracks generation statistics.

type Generator ¶

type Generator struct {
	// contains filtered or unexported fields
}

Generator handles LLM-based doc generation with caching.

func NewGenerator ¶

func NewGenerator(outputDir string, config *IndexConfig, dryRun bool, opts ...GeneratorOption) (*Generator, error)

NewGenerator creates a new doc generator. Config provides model IDs and AWS settings. Use opts to override behavior.

func (*Generator) Generate ¶

func (g *Generator) Generate(parsed *ParseResult, diff *DiffResult, cache *CacheManifest) (*GenerateStats, error)

Generate produces LLM summaries for all changed items in the diff. It updates the cache manifest and writes doc files.

func WithBackendOverride ¶

func WithBackendOverride(backend string) GeneratorOption

WithBackendOverride forces a specific LLM backend.

func WithMaxFiles ¶

func WithMaxFiles(n int) GeneratorOption

WithMaxFiles limits the number of files to process.

func WithVerbose ¶

func WithVerbose(v bool) GeneratorOption

WithVerbose enables verbose logging of LLM calls.

type IndexConfig ¶

type IndexConfig struct {
	// Project name (used in descriptions and logging).
	Project string `json:"project,omitempty"`
	// Sources to index.
	Sources []SourceConfig `json:"sources"`
	// LLM configuration for doc generation.
	LLM LLMConfig `json:"llm"`
	// Embeddings configuration.
	Embeddings EmbeddingsConfig `json:"embeddings"`
	// Storage configuration for vector distribution.
	Storage StorageConfig `json:"storage"`
	// AWS configuration.
	AWS AWSConfig `json:"aws"`
	// R configuration for the native R parser.
	R RConfig `json:"r,omitempty"`
}

IndexConfig defines what to index and how.

func LoadConfig ¶

func LoadConfig(repoRoot string) (*IndexConfig, error)

LoadConfig reads the config from .code-index.json. Returns an error if the config file is not found — every project must have one.

func (*IndexConfig) FunctionModel ¶

func (c *IndexConfig) FunctionModel() string

FunctionModel returns the model ID for function-level doc generation. Must be configured in .code-index.json under llm.function_model.

func (*IndexConfig) SummaryModel ¶

func (c *IndexConfig) SummaryModel() string

SummaryModel returns the model ID for file/package doc generation. Must be configured in .code-index.json under llm.summary_model.

type LLMBackend ¶

type LLMBackend interface {
	// Call sends a prompt to the LLM and returns the text response.
	// model is the full model ID as specified in .code-index.json config.
	Call(model, prompt string) (string, error)
	// Name returns a human-readable name for this backend.
	Name() string
}

LLMBackend is the interface for calling an LLM to generate summaries.

type LLMConfig ¶

type LLMConfig struct {
	// Provider: "bedrock" or "openai". Default: "bedrock".
	Provider string `json:"provider,omitempty"`
	// BaseURL is the API base URL (openai provider only). Default: "https://api.openai.com/v1".
	BaseURL string `json:"base_url,omitempty"`
	// APIKeyEnv is the env var name containing the API key (openai provider only). Default: "OPENAI_API_KEY".
	APIKeyEnv string `json:"api_key_env,omitempty"`
	// FunctionModel is the model for function-level summaries (high volume, fast).
	FunctionModel string `json:"function_model,omitempty"`
	// SummaryModel is the model for file and package summaries (higher quality).
	SummaryModel string `json:"summary_model,omitempty"`
}

LLMConfig defines the LLM provider and model settings for doc generation.

type MarkdownParser ¶

type MarkdownParser struct {
	// contains filtered or unexported fields
}

MarkdownParser extracts structured information from Markdown and Quarto files using regex/line-based parsing. Each heading becomes a searchable section (mapped to FunctionInfo), and YAML front matter provides file-level metadata.

func NewMarkdownParser ¶

func NewMarkdownParser(srcRoot string, excludes []string) *MarkdownParser

NewMarkdownParser creates a new Markdown/Quarto parser.

func (*MarkdownParser) Parse ¶

func (p *MarkdownParser) Parse(result *ParseResult) error

Parse walks the source tree and extracts all file and section information.

type OpenAIEmbedder ¶

type OpenAIEmbedder struct {
	// contains filtered or unexported fields
}

OpenAIEmbedder uses any OpenAI-compatible embeddings API. Works with OpenAI, Ollama, Together AI, LM Studio, vLLM, etc.

func NewOpenAIEmbedder ¶

func NewOpenAIEmbedder(model, baseURL, apiKeyEnv string) (*OpenAIEmbedder, error)

NewOpenAIEmbedder creates an embedder using the OpenAI embeddings API.

func (*OpenAIEmbedder) EmbedDocument ¶

func (e *OpenAIEmbedder) EmbedDocument(ctx context.Context, text string) ([]float32, error)

EmbedDocument and EmbedQuery produce identical embeddings — the OpenAI embeddings API has no document/query type distinction.

func (*OpenAIEmbedder) EmbedQuery ¶

func (e *OpenAIEmbedder) EmbedQuery(ctx context.Context, text string) ([]float32, error)

func (*OpenAIEmbedder) Name ¶

func (e *OpenAIEmbedder) Name() string

type OpenAILLMBackend ¶

type OpenAILLMBackend struct {
	// contains filtered or unexported fields
}

OpenAILLMBackend uses any OpenAI-compatible API for LLM calls. Works with OpenAI, Ollama, Together AI, Groq, Fireworks, LM Studio, vLLM, etc.

func NewOpenAILLMBackend ¶

func NewOpenAILLMBackend(baseURL, apiKeyEnv string) (*OpenAILLMBackend, error)

NewOpenAILLMBackend creates an LLM backend using the OpenAI chat/completions API.

func (*OpenAILLMBackend) Call ¶

func (b *OpenAILLMBackend) Call(model, prompt string) (string, error)

func (*OpenAILLMBackend) Name ¶

func (b *OpenAILLMBackend) Name() string

type PackageCache ¶

type PackageCache struct {
	FileDocHash   string    `json:"file_doc_hash"`
	LastGenerated time.Time `json:"last_generated"`
}

PackageCache tracks cache state for a single package.

type PackageEntry ¶

type PackageEntry struct {
	ImportPath string `json:"import_path"`
	Dir        string `json:"dir"`
	Doc        string `json:"doc,omitempty"`
	Summary    string `json:"summary,omitempty"`
	FileCount  int    `json:"file_count"`
}

PackageEntry is a package in the search index.

type PackageInfo ¶

type PackageInfo struct {
	// ImportPath is the full import path or directory path.
	ImportPath string `json:"import_path"`
	// Dir is the relative directory path from the source root.
	Dir string `json:"dir"`
	// Doc is the package-level doc comment.
	Doc string `json:"doc,omitempty"`
	// Files in this package (relative paths).
	Files []string `json:"files"`
	// ASTHash is a hash of the package's structural content.
	ASTHash string `json:"ast_hash"`
}

PackageInfo holds parsed information about a package or module.

type ParseResult ¶

type ParseResult struct {
	// Packages keyed by import path.
	Packages map[string]*PackageInfo `json:"packages"`
	// Files keyed by relative path.
	Files map[string]*FileInfo `json:"files"`
	// Timestamp of when parsing was performed.
	ParsedAt time.Time `json:"parsed_at"`
}

ParseResult holds the complete result of parsing the source tree.

func NewParseResult ¶

func NewParseResult() *ParseResult

NewParseResult creates an empty parse result.

type Parser ¶

type Parser struct {
	// contains filtered or unexported fields
}

Parser extracts structured information from Go source files using AST analysis.

func NewParser ¶

func NewParser(srcRoot, modulePrefix string) *Parser

NewParser creates a new AST parser for the given source root. modulePrefix is the Go module name (e.g., "myproject").

func NewParserWithConfig ¶

func NewParserWithConfig(srcRoot, modulePrefix string, excludes, vendorIncludes []string) *Parser

NewParserWithConfig creates a parser with explicit excludes and vendor includes.

func (*Parser) Parse ¶

func (p *Parser) Parse() (*ParseResult, error)

Parse walks the source tree and extracts all package, file, function, and type information. Returns a new ParseResult.

func (*Parser) ParseInto ¶

func (p *Parser) ParseInto(result *ParseResult) error

ParseInto walks the source tree and adds results to an existing ParseResult. This allows multiple parsers to contribute to the same result.

type PythonParser ¶

type PythonParser struct {
	// contains filtered or unexported fields
}

PythonParser extracts structured information from Python source files using tree-sitter.

func NewPythonParser ¶

func NewPythonParser(srcRoot string, excludes []string) *PythonParser

NewPythonParser creates a new Python parser.

func (*PythonParser) Parse ¶

func (p *PythonParser) Parse(result *ParseResult) error

Parse walks the source tree and extracts all file, function, and type information.

type RConfig ¶

type RConfig struct {
	// Executable is the path to the Rscript binary. If empty, Rscript is looked up in PATH.
	Executable string `json:"executable,omitempty"`
}

RConfig defines R-specific settings for native parsing.

type RParser ¶

type RParser struct {
	// contains filtered or unexported fields
}

RParser extracts structured information from R source files. It uses R's native parser (via Rscript) for accurate parsing when available, falling back to regex-based extraction otherwise.

func NewRParser ¶

func NewRParser(srcRoot string, excludes []string) *RParser

NewRParser creates a new R parser.

func NewRParserWithConfig ¶

func NewRParserWithConfig(srcRoot string, excludes []string, rscriptBin, repoRoot string) *RParser

NewRParserWithConfig creates a new R parser with an explicit Rscript path and repo root. If rscriptBin is empty, Rscript is looked up in PATH. If repoRoot is empty, it defaults to the parent of srcRoot.

func (*RParser) Parse ¶

func (p *RParser) Parse(result *ParseResult) error

Parse walks the source tree and extracts all file, function, and type information.

type SearchIndex ¶

type SearchIndex struct {
	// Packages with their summaries.
	Packages []PackageEntry `json:"packages"`
	// Files with their summaries.
	Files []FileEntry `json:"files"`
	// Functions with their summaries and signatures.
	Functions []FunctionEntry `json:"functions"`
	// Types with their summaries.
	Types []TypeEntry `json:"types"`
}

SearchIndex is the searchable index structure written for the MCP tool.

func BuildIndex ¶

func BuildIndex(parsed *ParseResult, outputDir string) (*SearchIndex, error)

BuildIndex constructs the searchable index from parsed results and cached docs.

type SearchResult ¶

type SearchResult struct {
	ID         string
	Content    string
	Similarity float32
	Metadata   map[string]string
}

SearchResult is a single result from a vector search.

type SourceConfig ¶

type SourceConfig struct {
	// Path is the directory to scan, relative to the repo root.
	Path string `json:"path"`
	// Language overrides auto-detection. Values: "go", "typescript", "javascript".
	Language string `json:"language,omitempty"`
	// ImportPrefix is the Go module import prefix.
	// Only used for Go sources. Auto-detected from go.mod if empty.
	ImportPrefix string `json:"import_prefix,omitempty"`
	// VendorInclude lists vendored Go module paths to include.
	VendorInclude []string `json:"vendor_include,omitempty"`
	// Exclude lists glob patterns of files/dirs to skip.
	Exclude []string `json:"exclude,omitempty"`
}

SourceConfig defines a single source to index.

type StorageConfig ¶

type StorageConfig struct {
	// URL is an HTTPS endpoint serving the vector database tarball.
	// Works with any hosting: GitHub Releases, GCS, Azure Blob, CDNs, etc.
	URL string `json:"url,omitempty"`
	// AuthTokenEnv is the env var name containing a bearer token for authenticated downloads.
	// Optional — only needed for private endpoints. The token is sent as "Authorization: Bearer $TOKEN".
	AuthTokenEnv string `json:"auth_token_env,omitempty"`
	// S3Bucket is the S3 bucket name (AWS-specific, uses SDK credential chain).
	S3Bucket string `json:"s3_bucket,omitempty"`
	// S3Prefix is the key prefix within the bucket.
	S3Prefix string `json:"s3_prefix,omitempty"`
}

StorageConfig defines where the vector database is hosted for team distribution. Auto-detect: if URL is set, use HTTP download. If S3Bucket is set, use AWS S3.

type TSParser ¶

type TSParser struct {
	// contains filtered or unexported fields
}

TSParser extracts structured information from TypeScript/JavaScript source files using tree-sitter.

func NewTSParser ¶

func NewTSParser(srcRoot string, excludes []string) *TSParser

NewTSParser creates a new TypeScript/JavaScript parser.

func (*TSParser) Parse ¶

func (p *TSParser) Parse(result *ParseResult) error

Parse walks the source tree and extracts all file, function, and type information.

type TypeEntry ¶

type TypeEntry struct {
	Name     string      `json:"name"`
	Kind     string      `json:"kind"`
	Doc      string      `json:"doc,omitempty"`
	Summary  string      `json:"summary,omitempty"`
	File     string      `json:"file"`
	Line     int         `json:"line"`
	Exported bool        `json:"exported"`
	Fields   []FieldInfo `json:"fields,omitempty"`
}

TypeEntry is a type in the search index.

type TypeInfo ¶

type TypeInfo struct {
	// Name is the type name.
	Name string `json:"name"`
	// Kind describes the type (e.g., "struct", "interface", "class", "enum", "typedef").
	Kind string `json:"kind"`
	// Doc is the documentation comment.
	Doc string `json:"doc,omitempty"`
	// File is the relative file path.
	File string `json:"file"`
	// Line is the starting line number.
	Line int `json:"line"`
	// Exported indicates whether the type is exported.
	Exported bool `json:"exported"`
	// Methods associated with this type (populated during parsing).
	Methods []FunctionInfo `json:"methods,omitempty"`
	// Fields for struct types.
	Fields []FieldInfo `json:"fields,omitempty"`
	// ASTHash is a hash of the normalized type AST.
	ASTHash string `json:"ast_hash"`
}

TypeInfo holds parsed information about a type declaration.

type VectorStore ¶

type VectorStore struct {
	// contains filtered or unexported fields
}

VectorStore manages the persistent vector database for code search.

func OpenVectorStore ¶

func OpenVectorStore(outputDir string, dimensions int) (*VectorStore, error)

OpenVectorStore opens or creates the vector database at the given path. dimensions is the embedding vector size (e.g., 1536 for Cohere, 768 for nomic-embed-text). If dimensions is 0 and the database already exists, the stored dimension is used. If dimensions is non-zero and differs from the stored value, an error is returned.

func (*VectorStore) AddDocument ¶

func (vs *VectorStore) AddDocument(ctx context.Context, id, content string, embedding []float32, meta DocumentMetadata) error

AddDocument adds a single document with a pre-computed embedding.

func (*VectorStore) Close ¶

func (vs *VectorStore) Close() error

Close closes the database connection.

func (*VectorStore) Count ¶

func (vs *VectorStore) Count() int

Count returns the number of documents in the store.

func (*VectorStore) Dimensions ¶

func (vs *VectorStore) Dimensions() int

Dimensions returns the embedding dimension size for this store.

func (*VectorStore) Reset ¶

func (vs *VectorStore) Reset(ctx context.Context) error

Reset deletes and recreates the database.

func (*VectorStore) Search ¶

func (vs *VectorStore) Search(ctx context.Context, queryEmbedding []float32, maxResults int) ([]SearchResult, error)

Search finds the most similar documents to the given embedding.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Overview ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Copyright (C) 2026 by Posit Software, PBC ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func BuildEmbeddingText ¶

func FunctionCacheKey ¶

func PrintStats ¶

func SaveCacheManifest ¶

func WriteIndex ¶

Types ¶

type AWSConfig ¶

type BedrockCohereEmbedder ¶

func NewBedrockEmbedder ¶

func (*BedrockCohereEmbedder) EmbedDocument ¶

func (*BedrockCohereEmbedder) EmbedQuery ¶

func (*BedrockCohereEmbedder) Name ¶

type BedrockLLMBackend ¶

func NewBedrockLLMBackend ¶

func (*BedrockLLMBackend) Call ¶

func (*BedrockLLMBackend) Name ¶

type CParser ¶

func NewCParser ¶

func (*CParser) Parse ¶

type CacheManifest ¶

func LoadCacheManifest ¶

func NewCacheManifest ¶

type DiffResult ¶

func ComputeDiff ¶

func NewDiffResult ¶

type DocumentMetadata ¶

type EmbedCache ¶

func LoadEmbedCache ¶

func NewEmbedCache ¶

func (*EmbedCache) IsUpToDate ¶

func (*EmbedCache) Remove ¶

func (*EmbedCache) Save ¶

func (*EmbedCache) Set ¶

type Embedder ¶

func NewEmbedder ¶

type EmbeddingsConfig ¶

type FieldInfo ¶

type FileCache ¶

type FileEntry ¶

type FileInfo ¶

type FunctionCache ¶

type FunctionEntry ¶

type FunctionInfo ¶

type GenerateStats ¶

type Generator ¶

func NewGenerator ¶

func (*Generator) Generate ¶

type GeneratorOption ¶

func WithBackendOverride ¶

func WithMaxFiles ¶

func WithVerbose ¶

type IndexConfig ¶

func LoadConfig ¶

func (*IndexConfig) FunctionModel ¶

func (*IndexConfig) SummaryModel ¶

type LLMBackend ¶

type LLMConfig ¶

type MarkdownParser ¶

func NewMarkdownParser ¶

func (*MarkdownParser) Parse ¶