Documentation
¶
Overview ¶
Package search provides search domain types for hybrid code retrieval.
Index ¶
- Constants
- func EmbeddingFrom(q repository.Query) ([]float64, bool)
- func QueryFrom(q repository.Query) (string, bool)
- func SnippetIDsFrom(q repository.Query) []string
- func WithEmbedding(embedding []float64) repository.Option
- func WithFilters(filters Filters) repository.Option
- func WithQuery(query string) repository.Option
- func WithSnippetID(id string) repository.Option
- func WithSnippetIDs(ids []string) repository.Option
- type BM25Store
- type BatchError
- type BatchProgress
- type Document
- type Embedder
- type Embedding
- type EmbeddingStore
- type Filters
- func (f Filters) Authors() []string
- func (f Filters) CommitSHAs() []string
- func (f Filters) CreatedAfter() time.Time
- func (f Filters) CreatedBefore() time.Time
- func (f Filters) EnrichmentSubtypes() []string
- func (f Filters) EnrichmentTypes() []string
- func (f Filters) FilePaths() []string
- func (f Filters) IsEmpty() bool
- func (f Filters) Languages() []string
- func (f Filters) SourceRepos() []int64
- func (f Filters) With(opts ...FiltersOption) Filters
- type FiltersOption
- func WithAuthors(authors []string) FiltersOption
- func WithCommitSHAs(shas []string) FiltersOption
- func WithCreatedAfter(t time.Time) FiltersOption
- func WithCreatedBefore(t time.Time) FiltersOption
- func WithEnrichmentSubtypes(subtypes []string) FiltersOption
- func WithEnrichmentTypes(types []string) FiltersOption
- func WithFilePaths(paths []string) FiltersOption
- func WithLanguages(languages []string) FiltersOption
- func WithSourceRepos(repos []int64) FiltersOption
- type Fusion
- type FusionRequest
- type FusionResult
- type IndexConfig
- type IndexOption
- type IndexRequest
- type MultiRequest
- type Query
- type Result
- type TokenBudget
- type Type
Constants ¶
const MaxSnippetIDsPerFind = 1000
MaxSnippetIDsPerFind is the maximum number of snippet IDs per Find call, keeping IN-clause bind parameters within the PostgreSQL 65535 limit.
Variables ¶
This section is empty.
Functions ¶
func EmbeddingFrom ¶
func EmbeddingFrom(q repository.Query) ([]float64, bool)
EmbeddingFrom extracts the embedding vector from a built query.
func QueryFrom ¶
func QueryFrom(q repository.Query) (string, bool)
QueryFrom extracts the search query text from a built query.
func SnippetIDsFrom ¶
func SnippetIDsFrom(q repository.Query) []string
SnippetIDsFrom extracts snippet IDs from conditions on a built query.
func WithEmbedding ¶
func WithEmbedding(embedding []float64) repository.Option
WithEmbedding passes a pre-computed embedding vector through options.
func WithFilters ¶
func WithFilters(filters Filters) repository.Option
WithFilters passes search filters through the option system.
func WithQuery ¶
func WithQuery(query string) repository.Option
WithQuery passes a search query string through options.
func WithSnippetID ¶
func WithSnippetID(id string) repository.Option
WithSnippetID filters by a single snippet ID.
func WithSnippetIDs ¶
func WithSnippetIDs(ids []string) repository.Option
WithSnippetIDs filters by multiple snippet IDs.
Types ¶
type BM25Store ¶
type BM25Store interface {
// Index adds documents to the BM25 index.
Index(ctx context.Context, request IndexRequest) error
// Find performs BM25 keyword search using options.
// Query text must be passed via WithQuery.
Find(ctx context.Context, options ...repository.Option) ([]Result, error)
// DeleteBy removes documents matching the given options.
DeleteBy(ctx context.Context, options ...repository.Option) error
}
BM25Store defines operations for BM25 full-text search indexing.
type BatchError ¶
BatchError is called when a batch fails during indexing. batchStart and batchEnd are the document offsets of the failed batch; err is the upstream error (e.g. HTTP 429, timeout, auth failure).
type BatchProgress ¶
type BatchProgress func(completed, total int)
BatchProgress is called after each batch completes during indexing. completed is the running total of documents processed so far; total is the overall number of documents to embed.
type Document ¶
type Document struct {
// contains filtered or unexported fields
}
Document represents a generic document for indexing.
func NewDocument ¶
NewDocument creates a new Document.
type Embedding ¶
type Embedding struct {
// contains filtered or unexported fields
}
Embedding represents a snippet with its pre-computed embedding vector.
func NewEmbedding ¶
NewEmbedding creates a new Embedding value object.
type EmbeddingStore ¶
type EmbeddingStore interface {
// SaveAll persists pre-computed embeddings.
SaveAll(ctx context.Context, embeddings []Embedding) error
// Find retrieves embeddings matching the given options.
Find(ctx context.Context, options ...repository.Option) ([]Embedding, error)
// Search performs vector similarity search using options.
// Embedding must be passed via WithEmbedding.
Search(ctx context.Context, options ...repository.Option) ([]Result, error)
// Exists checks whether any row matches the given options.
Exists(ctx context.Context, options ...repository.Option) (bool, error)
// DeleteBy removes documents matching the given options.
DeleteBy(ctx context.Context, options ...repository.Option) error
}
EmbeddingStore defines persistence operations for vector embeddings.
type Filters ¶
type Filters struct {
// contains filtered or unexported fields
}
Filters represents filters for snippet search.
func FiltersFrom ¶
func FiltersFrom(q repository.Query) (Filters, bool)
FiltersFrom extracts search filters from a built query.
func NewFilters ¶
func NewFilters(opts ...FiltersOption) Filters
NewFilters creates a new Filters with options.
func (Filters) CommitSHAs ¶
CommitSHAs returns the commit SHA filter.
func (Filters) CreatedAfter ¶
CreatedAfter returns the created after filter.
func (Filters) CreatedBefore ¶
CreatedBefore returns the created before filter.
func (Filters) EnrichmentSubtypes ¶
EnrichmentSubtypes returns the enrichment subtypes filter.
func (Filters) EnrichmentTypes ¶
EnrichmentTypes returns the enrichment types filter.
func (Filters) SourceRepos ¶
SourceRepos returns the source repository filter.
func (Filters) With ¶ added in v1.1.7
func (f Filters) With(opts ...FiltersOption) Filters
With returns a copy of f with the given options applied.
type FiltersOption ¶
type FiltersOption func(*Filters)
FiltersOption is a functional option for Filters.
func WithAuthors ¶
func WithAuthors(authors []string) FiltersOption
WithAuthors sets the author filter.
func WithCommitSHAs ¶
func WithCommitSHAs(shas []string) FiltersOption
WithCommitSHAs sets the commit SHA filter.
func WithCreatedAfter ¶
func WithCreatedAfter(t time.Time) FiltersOption
WithCreatedAfter sets the created after filter.
func WithCreatedBefore ¶
func WithCreatedBefore(t time.Time) FiltersOption
WithCreatedBefore sets the created before filter.
func WithEnrichmentSubtypes ¶
func WithEnrichmentSubtypes(subtypes []string) FiltersOption
WithEnrichmentSubtypes sets the enrichment subtypes filter.
func WithEnrichmentTypes ¶
func WithEnrichmentTypes(types []string) FiltersOption
WithEnrichmentTypes sets the enrichment types filter.
func WithFilePaths ¶
func WithFilePaths(paths []string) FiltersOption
WithFilePaths sets the file path filter.
func WithLanguages ¶
func WithLanguages(languages []string) FiltersOption
WithLanguages sets the language filter.
func WithSourceRepos ¶
func WithSourceRepos(repos []int64) FiltersOption
WithSourceRepos sets the source repository filter.
type Fusion ¶
type Fusion struct {
// contains filtered or unexported fields
}
Fusion combines results from multiple search methods using Reciprocal Rank Fusion (RRF) algorithm.
func NewFusionWithK ¶
NewFusionWithK creates a Fusion with a custom RRF constant.
func (Fusion) Fuse ¶
func (f Fusion) Fuse(lists ...[]FusionRequest) []FusionResult
Fuse combines multiple ranked result lists using Reciprocal Rank Fusion. Each input list should be sorted by score (descending). Returns a fused list sorted by combined RRF score.
func (Fusion) FuseTopK ¶
func (f Fusion) FuseTopK(topK int, lists ...[]FusionRequest) []FusionResult
FuseTopK combines multiple ranked result lists and returns the top K results.
type FusionRequest ¶
type FusionRequest struct {
// contains filtered or unexported fields
}
FusionRequest represents a fusion request input.
func NewFusionRequest ¶
func NewFusionRequest(id string, score float64) FusionRequest
NewFusionRequest creates a new FusionRequest.
type FusionResult ¶
type FusionResult struct {
// contains filtered or unexported fields
}
FusionResult represents a fusion result.
func NewFusionResult ¶
func NewFusionResult(id string, score float64, originalScores []float64) FusionResult
NewFusionResult creates a new FusionResult.
func (FusionResult) OriginalScores ¶
func (f FusionResult) OriginalScores() []float64
OriginalScores returns the original scores from each search method.
type IndexConfig ¶
type IndexConfig struct {
// contains filtered or unexported fields
}
IndexConfig holds the resolved configuration for an Index call.
func NewIndexConfig ¶
func NewIndexConfig(opts ...IndexOption) IndexConfig
NewIndexConfig applies all options and returns the resolved config.
func (IndexConfig) BatchError ¶
func (c IndexConfig) BatchError() BatchError
BatchError returns the batch error callback, or nil if none was set.
func (IndexConfig) MaxFailureRate ¶
func (c IndexConfig) MaxFailureRate() float64
MaxFailureRate returns the maximum fraction of batches that may fail before the Index call returns an error. Default is 0.05 (5%).
func (IndexConfig) Progress ¶
func (c IndexConfig) Progress() BatchProgress
Progress returns the progress callback, or nil if none was set.
type IndexOption ¶
type IndexOption func(*IndexConfig)
IndexOption configures the behaviour of an Index call.
func WithBatchError ¶
func WithBatchError(fn BatchError) IndexOption
WithBatchError registers a callback that is invoked when an individual batch fails during indexing. This allows callers to log each upstream error (HTTP status, timeout, etc.) as it occurs.
func WithMaxFailureRate ¶
func WithMaxFailureRate(rate float64) IndexOption
WithMaxFailureRate sets the maximum fraction of batches that may fail before the Index call returns an error. The rate is clamped to [0, 1]. A rate of 0 means any single batch failure is fatal.
func WithProgress ¶
func WithProgress(fn BatchProgress) IndexOption
WithProgress registers a callback that is invoked after each batch of embeddings is generated and saved.
type IndexRequest ¶
type IndexRequest struct {
// contains filtered or unexported fields
}
IndexRequest represents a generic indexing request.
func NewIndexRequest ¶
func NewIndexRequest(documents []Document) IndexRequest
NewIndexRequest creates a new IndexRequest.
func (IndexRequest) Documents ¶
func (i IndexRequest) Documents() []Document
Documents returns the documents to index.
type MultiRequest ¶
type MultiRequest struct {
// contains filtered or unexported fields
}
MultiRequest represents a multi-modal search request.
func NewMultiRequest ¶
func NewMultiRequest( topK int, textQuery, codeQuery string, keywords []string, filters Filters, ) MultiRequest
NewMultiRequest creates a new MultiRequest.
func (MultiRequest) CodeQuery ¶
func (m MultiRequest) CodeQuery() string
CodeQuery returns the code query.
func (MultiRequest) Filters ¶
func (m MultiRequest) Filters() Filters
Filters returns the search filters.
func (MultiRequest) Keywords ¶
func (m MultiRequest) Keywords() []string
Keywords returns the keywords.
func (MultiRequest) TextQuery ¶
func (m MultiRequest) TextQuery() string
TextQuery returns the text query.
func (MultiRequest) TopK ¶
func (m MultiRequest) TopK() int
TopK returns the number of results to return.
type Query ¶
type Query struct {
// contains filtered or unexported fields
}
Query represents a snippet search query.
type Result ¶
type Result struct {
// contains filtered or unexported fields
}
Result represents a generic search result.
type TokenBudget ¶
type TokenBudget struct {
// contains filtered or unexported fields
}
TokenBudget constrains embedding batches to stay within model token limits. It holds a character budget and a maximum batch size: each batch's total (truncated) text must not exceed maxChars, each batch contains at most maxBatchSize documents, and individual texts are truncated to maxChars.
func DefaultTokenBudget ¶
func DefaultTokenBudget() TokenBudget
DefaultTokenBudget returns a conservative budget of 16 000 characters (~5 300 tokens at ~3 chars/token), safe for 8 192-token models like text-embedding-3-small.
func NewTokenBudget ¶
func NewTokenBudget(maxChars int) (TokenBudget, error)
NewTokenBudget creates a TokenBudget with the given character limit. maxChars must be positive.
func (TokenBudget) Batches ¶
func (b TokenBudget) Batches(documents []Document) [][]Document
Batches partitions documents into groups whose total truncated character count stays within the budget and whose size does not exceed maxBatchSize. A single document whose truncated text still exceeds the character budget is placed alone in its own batch.
func (TokenBudget) Truncate ¶
func (b TokenBudget) Truncate(text string) string
Truncate returns text capped to the character (rune) limit.
func (TokenBudget) WithMaxBatchSize ¶
func (b TokenBudget) WithMaxBatchSize(n int) TokenBudget
WithMaxBatchSize returns a new TokenBudget with the given maximum number of documents per batch. Values <= 0 are clamped to 1.