Documentation
¶
Index ¶
- Constants
- Variables
- func DecodeFilename(keys [][]byte) string
- func DecodeTrigram(v uint32) string
- func FinalKey(filename string) []byte
- func LineChunkFunc(_ string, content []byte, yield func(Chunk) bool) error
- func MarkdownChunkFunc(_ string, content []byte, yield func(Chunk) bool) error
- func PairGet(pairs []Pair, key string) ([]byte, bool)
- func ScoreOverlap(queryTrigrams []uint32, chunkCounts map[uint32]int, _ int) float64
- func TrigramValue(a, b, c byte) uint32
- func UnmarshalTValue(data []byte) ([]uint64, error)
- func UnmarshalWValue(data []byte) ([]uint64, error)
- func ValidateAliases(aliases map[byte]byte) error
- type AppendOption
- type Bitset
- type BracketGroup
- type BracketLang
- type CRecord
- type Chunk
- type ChunkCache
- type ChunkCallback
- type ChunkFilter
- type ChunkFunc
- type ChunkResult
- type Chunker
- type DB
- func (db *DB) AddChunker(name string, c Chunker) error
- func (db *DB) AddFile(fpath, strategy string, opts ...IndexOption) (uint64, error)
- func (db *DB) AddFileWithContent(fpath, strategy string, opts ...IndexOption) (uint64, []byte, error)
- func (db *DB) AddStrategy(name, cmd string) error
- func (db *DB) AddStrategyFunc(name string, fn ChunkFunc) error
- func (db *DB) AddTmpFile(path, strategy string, content []byte, opts ...IndexOption) (uint64, error)
- func (db *DB) AppendChunks(fileid uint64, content []byte, strategy string, opts ...AppendOption) error
- func (db *DB) AppendTmpFile(path, strategy string, content []byte, opts ...AppendOption) (uint64, error)
- func (db *DB) BM25Func(queryTrigrams []uint32) (ScoreFunc, error)
- func (db *DB) CheckFile(fpath string) (FileStatus, error)
- func (db *DB) Close() error
- func (db *DB) Copy() *DB
- func (db *DB) Env() *lmdb.Env
- func (db *DB) FileIDPaths() (map[uint64]string, error)
- func (db *DB) FileInfoByID(fileid uint64) (FRecord, error)
- func (db *DB) GetChunks(fpath, targetRange string, before, after int) ([]ChunkResult, error)
- func (db *DB) HasTmp() bool
- func (db *DB) InvalidateCaches()
- func (db *DB) NewChunkCache() *ChunkCache
- func (db *DB) NewSearchCache() func()
- func (db *DB) QueryTrigramCounts(query string) ([]TrigramCount, error)
- func (db *DB) RecordCounts() (map[byte]RecordStats, error)
- func (db *DB) RefreshStale(strategy string, opts ...IndexOption) ([]FileStatus, error)
- func (db *DB) Reindex(fpath, strategy string, opts ...IndexOption) (uint64, error)
- func (db *DB) ReindexWithContent(fpath, strategy string, opts ...IndexOption) (uint64, []byte, error)
- func (db *DB) RemoveFile(fpath string) error
- func (db *DB) RemoveStrategy(name string) error
- func (db *DB) RemoveTmpFile(path string) error
- func (db *DB) ScoreFile(query, fpath string, fn ScoreFunc, opts ...SearchOption) ([]ScoredChunk, error)
- func (db *DB) Search(query string, opts ...SearchOption) (*SearchResults, error)
- func (db *DB) SearchFuzzy(query string, k int, opts ...SearchOption) (*SearchResults, error)
- func (db *DB) SearchMulti(query string, strategies map[string]ScoreFunc, k int, opts ...SearchOption) ([]MultiSearchResult, error)
- func (db *DB) SearchRegex(pattern string, opts ...SearchOption) (*SearchResults, error)
- func (db *DB) Settings() Settings
- func (db *DB) StaleFiles() ([]FileStatus, error)
- func (db *DB) TmpContent(path string) (*bytes.Reader, error)
- func (db *DB) TmpFileIDs() map[uint64]struct{}
- func (db *DB) UpdateTmpFile(path, strategy string, content []byte, opts ...IndexOption) error
- func (db *DB) Version() (string, error)
- type FRecord
- type FileChunkEntry
- type FileStatus
- type FuncChunker
- type HRecord
- type IndexOption
- type IndexStatus
- type KeyPair
- type MultiSearchResult
- type Options
- type Pair
- type RecordStats
- type ScoreFunc
- type ScoredChunk
- type SearchOption
- func WithAfter(t time.Time) SearchOption
- func WithBefore(t time.Time) SearchOption
- func WithChunkCache(cc *ChunkCache) SearchOption
- func WithChunkFilter(fn ChunkFilter) SearchOption
- func WithCoverage() SearchOption
- func WithDensity() SearchOption
- func WithExcept(ids map[uint64]struct{}) SearchOption
- func WithExceptRegex(patterns ...string) SearchOption
- func WithLoose() SearchOption
- func WithNoTmp() SearchOption
- func WithOnly(ids map[uint64]struct{}) SearchOption
- func WithOverlap() SearchOption
- func WithProximityRerank(topN int) SearchOption
- func WithRegexFilter(patterns ...string) SearchOption
- func WithScoring(fn ScoreFunc) SearchOption
- func WithTrigramFilter(fn TrigramFilter) SearchOption
- func WithVerify() SearchOption
- type SearchResult
- type SearchResults
- type Settings
- type StringDelim
- type TRecord
- type TokenEntry
- type TrigramCount
- type TrigramEntry
- type TrigramFilter
- type Trigrams
- type TxnHolder
- type WRecord
Constants ¶
const BitsetSize = 2097152 // 2^21 bytes = 2^24 bits
Variables ¶
var ErrAlreadyIndexed = errors.New("file already indexed")
ErrAlreadyIndexed is returned when AddFile is called for a path that already has F records in the database. Use Reindex or AppendChunks instead. R215
var ErrNoChunks = errors.New("chunker produced no chunks")
ErrNoChunks is returned when a chunker produces zero chunks for a file.
var LangC = BracketLang{ LineComments: []string{"//"}, BlockComments: [][2]string{{"/*", "*/"}}, StringDelims: []StringDelim{ {Open: `"`, Close: `"`, Escape: `\`}, {Open: "'", Close: "'", Escape: `\`}, }, Brackets: []BracketGroup{ {Open: []string{"{"}, Close: []string{"}"}}, {Open: []string{"("}, Close: []string{")"}}, {Open: []string{"["}, Close: []string{"]"}}, }, }
LangC is the bracket language config for C/C++.
var LangGo = BracketLang{ LineComments: []string{"//"}, BlockComments: [][2]string{{"/*", "*/"}}, StringDelims: []StringDelim{ {Open: `"`, Close: `"`, Escape: `\`}, {Open: "`", Close: "`"}, {Open: "'", Close: "'", Escape: `\`}, }, Brackets: []BracketGroup{ {Open: []string{"{"}, Close: []string{"}"}}, {Open: []string{"("}, Close: []string{")"}}, {Open: []string{"["}, Close: []string{"]"}}, }, }
LangGo is the bracket language config for Go.
var LangJS = BracketLang{ LineComments: []string{"//"}, BlockComments: [][2]string{{"/*", "*/"}}, StringDelims: []StringDelim{ {Open: `"`, Close: `"`, Escape: `\`}, {Open: "'", Close: "'", Escape: `\`}, {Open: "`", Close: "`", Escape: `\`}, }, Brackets: []BracketGroup{ {Open: []string{"{"}, Close: []string{"}"}}, {Open: []string{"("}, Close: []string{")"}}, {Open: []string{"["}, Close: []string{"]"}}, }, }
LangJS is the bracket language config for JavaScript.
var LangJava = LangC
LangJava is the bracket language config for Java.
var LangLisp = BracketLang{ LineComments: []string{";"}, StringDelims: []StringDelim{ {Open: `"`, Close: `"`, Escape: `\`}, }, Brackets: []BracketGroup{ {Open: []string{"("}, Close: []string{")"}}, {Open: []string{"["}, Close: []string{"]"}}, }, }
LangLisp is the bracket language config for Lisp/Scheme/Clojure.
var LangNginx = BracketLang{ LineComments: []string{"#"}, StringDelims: []StringDelim{ {Open: `"`, Close: `"`, Escape: `\`}, {Open: "'", Close: "'"}, }, Brackets: []BracketGroup{ {Open: []string{"{"}, Close: []string{"}"}}, }, }
LangNginx is the bracket language config for nginx.
var LangPascal = BracketLang{ BlockComments: [][2]string{{"{", "}"}, {"(*", "*)"}}, StringDelims: []StringDelim{ {Open: "'", Close: "'"}, }, Brackets: []BracketGroup{ { Open: []string{"begin", "record", "class"}, Separators: []string{}, Close: []string{"end"}, }, { Open: []string{"if"}, Separators: []string{"then", "else"}, Close: []string{"end"}, }, {Open: []string{"("}, Close: []string{")"}}, {Open: []string{"["}, Close: []string{"]"}}, }, }
LangPascal is the bracket language config for Pascal.
var LangShell = BracketLang{ LineComments: []string{"#"}, StringDelims: []StringDelim{ {Open: `"`, Close: `"`, Escape: `\`}, {Open: "'", Close: "'"}, }, Brackets: []BracketGroup{ { Open: []string{"if"}, Separators: []string{"then", "elif", "else"}, Close: []string{"fi"}, }, { Open: []string{"while", "for"}, Separators: []string{"do"}, Close: []string{"done"}, }, { Open: []string{"case"}, Close: []string{"esac"}, }, {Open: []string{"{"}, Close: []string{"}"}}, {Open: []string{"("}, Close: []string{")"}}, }, }
LangShell is the bracket language config for Bourne shell / bash.
Functions ¶
func DecodeFilename ¶
DecodeFilename reconstructs a filename from chained N record keys. Keys must be in order (part 0, 1, ..., 255).
func DecodeTrigram ¶
DecodeTrigram converts a 24-bit trigram value back to a 3-byte string. Bytes that are 0 (whitespace-encoded) are shown as spaces.
func LineChunkFunc ¶
LineChunkFunc is a built-in ChunkFunc that yields one chunk per line. Range is "N-N" (1-based line number).
func MarkdownChunkFunc ¶
MarkdownChunkFunc splits markdown content into paragraph-based chunks. Heading lines start new chunks; a heading and its following paragraph (up to the next blank line or heading) form one chunk. Blank lines are boundaries only and are not included in any chunk's content. Fenced code blocks (``` or ~~~) suppress blank-line splitting — all lines from opening fence through matching close belong to the current chunk. // R465, R466, R467, R468
func ScoreOverlap ¶
ScoreOverlap: count of matching query trigrams, no normalization (OR semantics).
func TrigramValue ¶
TrigramValue computes the 24-bit trigram from three byte values.
func UnmarshalTValue ¶
UnmarshalTValue decodes a TRecord value. Trigram must be set separately (from key).
func UnmarshalWValue ¶
UnmarshalWValue decodes a WRecord value. Same format as TRecord.
func ValidateAliases ¶
ValidateAliases returns an error if any alias source or target byte is non-ASCII (≥ 0x80). Aliasing UTF-8 continuation or leading bytes would corrupt multibyte characters and break character-internal trigram skipping.
Types ¶
type AppendOption ¶
type AppendOption func(*appendConfig)
AppendOption configures AppendChunks behavior. R158
func WithAppendChunkCallback ¶
func WithAppendChunkCallback(fn ChunkCallback) AppendOption
WithAppendChunkCallback supplies a chunk callback for append methods. CRC: crc-DB.md | R471
func WithBaseLine ¶
func WithBaseLine(n int) AppendOption
WithBaseLine sets the 1-based line number offset for line-based chunker ranges. When non-zero, "start-end" ranges are adjusted by adding this offset. R162
func WithContentHash ¶
func WithContentHash(hash string) AppendOption
WithContentHash sets the full-file SHA-256 hash (caller pre-computed). R159
func WithFileLength ¶
func WithFileLength(n int64) AppendOption
WithFileLength sets the full file size after append. R161
func WithModTime ¶
func WithModTime(t int64) AppendOption
WithModTime sets the file modification time (Unix nanoseconds). R160
type Bitset ¶
type Bitset [BitsetSize]byte
Bitset is a fixed-size bitset for 16,777,216 trigrams (2^24).
type BracketGroup ¶
type BracketGroup struct {
Open []string // openers: e.g. ["{"], ["if","while","for"]
Separators []string // optional: e.g. ["else","elif","then"]
Close []string // closers: e.g. ["}"], ["end","done","fi"]
}
BracketGroup defines one set of matching brackets. R309 Separators are mid-group markers (e.g. "else" between "if"/"end").
type BracketLang ¶
type BracketLang struct {
LineComments []string // e.g. "//", "#", "--"
BlockComments [][2]string // e.g. {{"/*", "*/"}, {"<!--", "-->"}}
StringDelims []StringDelim // e.g. {`"`, `"`, `\`}
Brackets []BracketGroup // open/separator/close sets
}
BracketLang defines the lexical rules for one language. R307
func LangByName ¶
func LangByName(name string) (BracketLang, bool)
LangByName returns a BracketLang config by name, or false if not found.
type CRecord ¶
type CRecord struct {
ChunkID uint64
Hash [32]byte
Trigrams []TrigramEntry
Tokens []TokenEntry
Attrs []Pair
FileIDs []uint64
// contains filtered or unexported fields
}
CRecord is the per-chunk record. Self-describing: everything needed for search, scoring, filtering, and removal. Carries unexported db/txn — the chunk is tied to the transaction that read it.
func UnmarshalCValue ¶
UnmarshalCValue decodes a CRecord value. ChunkID must be set separately (from key). v2 format: hash + trigrams + tokens + attrs + fileids
func (*CRecord) FileRecord ¶
FileRecord navigates to an F record within the same transaction.
func (*CRecord) MarshalValue ¶
MarshalValue encodes the CRecord value (everything except the key prefix and chunkid). v2 format: hash + trigrams + tokens + attrs + fileids
type Chunk ¶
Chunk is a single chunk yielded by a Chunker. Range is an opaque label (e.g. "1-10" for lines); Content is the chunk text. Range and Content may be reused between yields — caller must copy if retaining. Attrs is optional per-chunk metadata (nil means no attrs).
type ChunkCache ¶
type ChunkCache struct {
// contains filtered or unexported fields
}
ChunkCache is a per-query cache for file content and chunked data. Avoids redundant file reads and re-chunking when processing search results.
func (*ChunkCache) ChunkText ¶
func (cc *ChunkCache) ChunkText(fpath, rangeLabel string) ([]byte, bool)
ChunkText returns a single chunk's content by range label. Uses lazy chunking — stops as soon as the target is found.
func (*ChunkCache) GetChunks ¶
func (cc *ChunkCache) GetChunks(fpath, targetRange string, before, after int) ([]ChunkResult, error)
GetChunks retrieves the target chunk and up to before/after positional neighbors. Same contract as DB.GetChunks but cached.
type ChunkCallback ¶
type ChunkCallback func(chunkText string)
ChunkCallback receives clean chunk text during indexing. Called once per chunk, in chunk order. The string is a copy, safe to retain. CRC: crc-DB.md | R469
type ChunkFilter ¶
ChunkFilter receives a CRecord during candidate evaluation. Return true to keep the chunk, false to reject it. The CRecord carries transaction context — use Txn() and DB() for lookups.
type ChunkFunc ¶
ChunkFunc is a generator that yields chunks for a file. Convenience type — wrap with FuncChunker to get a full Chunker.
func RunChunkerFunc ¶
RunChunkerFunc returns a ChunkFunc that executes an external command. The command receives the filepath as an argument and outputs one chunk per line on stdout as "range\tcontent".
type ChunkResult ¶
type ChunkResult struct {
Path string `json:"path"`
Range string `json:"range"`
Content string `json:"content"`
Index int `json:"index"` // 0-based position in the file's chunk list
Attrs []Pair `json:"attrs,omitempty"`
}
ChunkResult holds a single chunk with its content and position. R201
type Chunker ¶
type Chunker interface {
Chunks(path string, content []byte, yield func(Chunk) bool) error
ChunkText(path string, content []byte, rangeLabel string) ([]byte, bool)
}
Chunker is the interface for chunking strategies. Chunks produces chunks for indexing; ChunkText retrieves a single chunk's content.
func BracketChunker ¶
func BracketChunker(lang BracketLang) Chunker
BracketChunker returns a Chunker for the given language config. R320
func IndentChunker ¶
func IndentChunker(lang BracketLang, tabWidth int) Chunker
IndentChunker returns a Chunker for indentation-scoped languages. R333 tabWidth controls how tabs count for column calculation (0 = one column per tab).
type DB ¶
type DB struct {
// contains filtered or unexported fields
}
func (*DB) AddChunker ¶
CRC: crc-DB.md | R293
func (*DB) AddFile ¶
func (db *DB) AddFile(fpath, strategy string, opts ...IndexOption) (uint64, error)
Seq: seq-add.md | R477
func (*DB) AddFileWithContent ¶
func (db *DB) AddFileWithContent(fpath, strategy string, opts ...IndexOption) (uint64, []byte, error)
CRC: crc-DB.md | R120, R478
func (*DB) AddStrategy ¶
func (*DB) AddStrategyFunc ¶
CRC: crc-DB.md | R294
func (*DB) AddTmpFile ¶
func (db *DB) AddTmpFile(path, strategy string, content []byte, opts ...IndexOption) (uint64, error)
CRC: crc-DB.md | Seq: seq-tmp-add.md | R358, R359, R360 AddTmpFile indexes a tmp:// document in the in-memory overlay. CRC: crc-DB.md | R480
func (*DB) AppendChunks ¶
func (db *DB) AppendChunks(fileid uint64, content []byte, strategy string, opts ...AppendOption) error
AppendChunks adds chunks to an existing file without full reindex. content is only the appended bytes, not the full file. CRC: crc-DB.md | Seq: seq-append.md R150, R151, R152, R153, R154, R155, R156, R157, R163, R164, R165, R166, R167, R168
func (*DB) AppendTmpFile ¶
func (db *DB) AppendTmpFile(path, strategy string, content []byte, opts ...AppendOption) (uint64, error)
CRC: crc-DB.md | R428-R442, R483 AppendTmpFile appends content to an existing tmp:// document, creating it if it doesn't exist. New chunks are indexed from the appended content without touching existing chunks.
func (*DB) BM25Func ¶
CRC: crc-DB.md | R274, R277, R278 BM25Func reads T records for per-trigram document frequencies and I record counters for corpus statistics, then returns a BM25 ScoreFunc closure.
func (*DB) CheckFile ¶
func (db *DB) CheckFile(fpath string) (FileStatus, error)
CheckFile checks whether an indexed file is fresh, stale, or missing on disk.
func (*DB) Copy ¶
CRC: crc-DB.md | R459, R460, R461, R462 Copy returns a shallow copy of the DB sharing the LMDB env, overlay, and chunker registry. Caches are nil — the copy lazy-loads from committed LMDB state. Intended for short-lived write transactions in a separate goroutine.
func (*DB) FileIDPaths ¶
CRC: crc-DB.md | R448, R449, R450, R454
func (*DB) FileInfoByID ¶
FileInfoByID resolves a fileid to its FRecord.
func (*DB) GetChunks ¶
func (db *DB) GetChunks(fpath, targetRange string, before, after int) ([]ChunkResult, error)
Seq: seq-chunks.md | R197, R198, R199, R200, R201, R202, R203 GetChunks retrieves the target chunk (identified by range label) and up to before/after positional neighbors. Returns chunks in positional order.
func (*DB) HasTmp ¶
CRC: crc-DB.md | R377 HasTmp reports whether any tmp:// documents exist in the overlay.
func (*DB) InvalidateCaches ¶
func (db *DB) InvalidateCaches()
CRC: crc-DB.md | R463, R464 InvalidateCaches nils the path and FRecord caches, forcing lazy reload on next access. Does not reset overlayOnce.
func (*DB) NewChunkCache ¶
func (db *DB) NewChunkCache() *ChunkCache
NewChunkCache creates a per-query chunk cache.
func (*DB) QueryTrigramCounts ¶
func (db *DB) QueryTrigramCounts(query string) ([]TrigramCount, error)
QueryTrigramCounts extracts trigrams from a query string and returns their corpus document frequencies. For diagnostic/inspection use.
func (*DB) RecordCounts ¶
func (db *DB) RecordCounts() (map[byte]RecordStats, error)
CRC: crc-DB.md | R443, R444, R445
func (*DB) RefreshStale ¶
func (db *DB) RefreshStale(strategy string, opts ...IndexOption) ([]FileStatus, error)
RefreshStale reindexes all stale files. If strategy is empty, each file's existing strategy is used. Returns the list of stale/missing files. CRC: crc-DB.md | R479
func (*DB) Reindex ¶
func (db *DB) Reindex(fpath, strategy string, opts ...IndexOption) (uint64, error)
func (*DB) ReindexWithContent ¶
func (db *DB) ReindexWithContent(fpath, strategy string, opts ...IndexOption) (uint64, []byte, error)
CRC: crc-DB.md | R121
func (*DB) RemoveFile ¶
func (*DB) RemoveStrategy ¶
func (*DB) RemoveTmpFile ¶
CRC: crc-DB.md | Seq: seq-tmp-add.md | R364, R365 RemoveTmpFile removes a tmp:// document from the overlay.
func (*DB) ScoreFile ¶
func (db *DB) ScoreFile(query, fpath string, fn ScoreFunc, opts ...SearchOption) ([]ScoredChunk, error)
Seq: seq-score.md | R178, R179, R180 ScoreFile returns per-chunk scores for a single file using the given scoring function.
func (*DB) Search ¶
func (db *DB) Search(query string, opts ...SearchOption) (*SearchResults, error)
Seq: seq-search.md | R178, R179, R180, R181, R182
func (*DB) SearchFuzzy ¶
func (db *DB) SearchFuzzy(query string, k int, opts ...SearchOption) (*SearchResults, error)
CRC: crc-DB.md | Seq: seq-fuzzy-trigram.md | R418, R419, R420, R421, R422, R423, R425, R427 SearchFuzzy performs fast typo-tolerant search using two phases: Phase 1: trigram OR-union tally from T record posting lists (select top-k) Phase 2: C record re-score with ScoreCoverage for the top-k winners
func (*DB) SearchMulti ¶
func (db *DB) SearchMulti(query string, strategies map[string]ScoreFunc, k int, opts ...SearchOption) ([]MultiSearchResult, error)
CRC: crc-DB.md | Seq: seq-search-multi.md | R283, R284, R285, R287, R288, R289, R290
func (*DB) SearchRegex ¶
func (db *DB) SearchRegex(pattern string, opts ...SearchOption) (*SearchResults, error)
Seq: seq-search.md SearchRegex searches using a regex pattern against the full trigram index.
func (*DB) StaleFiles ¶
func (db *DB) StaleFiles() ([]FileStatus, error)
StaleFiles returns the status of every indexed file.
func (*DB) TmpContent ¶
CRC: crc-DB.md | R378 TmpContent returns a reader over the raw stored content of a tmp:// document.
func (*DB) TmpFileIDs ¶
CRC: crc-DB.md | R369 TmpFileIDs returns the set of all current tmp:// fileids.
func (*DB) UpdateTmpFile ¶
func (db *DB) UpdateTmpFile(path, strategy string, content []byte, opts ...IndexOption) error
CRC: crc-DB.md | Seq: seq-tmp-add.md | R361, R362, R363, R481 UpdateTmpFile replaces the content of an existing tmp:// document.
type FRecord ¶
type FRecord struct {
FileID uint64
ModTime int64
ContentHash [32]byte
FileLength int64
Strategy string
Names []string
Chunks []FileChunkEntry
Tokens []TokenEntry
}
FRecord is the per-file record. Metadata, ordered chunks, file-level token bag.
func UnmarshalFHeader ¶
R451, R452: UnmarshalFHeader decodes only the header fields of an F record value: ModTime, ContentHash, FileLength, Strategy, and Names. Skips Chunks and Tokens.
func UnmarshalFValue ¶
UnmarshalFValue decodes an FRecord value. FileID must be set separately (from key).
func (*FRecord) MarshalValue ¶
MarshalValue encodes the FRecord value (everything except the key prefix and fileid).
type FileChunkEntry ¶
FileChunkEntry pairs a chunkid with its location label (opaque range string from chunker).
type FileStatus ¶
type FileStatus struct {
Path string
Status string // "fresh", "stale", "missing"
FileID uint64
Strategy string
}
FileStatus is returned by CheckFile and StaleFiles.
type FuncChunker ¶
type FuncChunker struct {
Fn ChunkFunc
}
FuncChunker wraps a bare ChunkFunc into a Chunker. ChunkText re-runs the function and returns the first chunk matching the range label.
type IndexOption ¶
type IndexOption func(*indexConfig)
IndexOption configures indexing methods (AddFile, AddFileWithContent, RefreshStale, AddTmpFile, UpdateTmpFile). CRC: crc-DB.md | R472
func WithChunkCallback ¶
func WithChunkCallback(fn ChunkCallback) IndexOption
WithChunkCallback supplies a chunk callback for indexing methods. CRC: crc-DB.md | R470
type IndexStatus ¶
type IndexStatus struct {
Built bool
}
IndexStatus reports the state of the index.
type KeyPair ¶
type KeyPair struct {
Key []byte
Value []byte // nil for non-final parts; caller sets fileid on final part
}
KeyPair is an N record key/value pair for filename key chains.
func EncodeFilename ¶
EncodeFilename returns N record key/value pairs for a filename. Short filenames (≤509 bytes) produce a single final key. Longer filenames are split across chained keys.
type MultiSearchResult ¶
type MultiSearchResult struct {
Strategy string
Results []SearchResult
}
CRC: crc-DB.md | R286 MultiSearchResult holds one strategy's results from SearchMulti.
type Options ¶
type Options struct {
CaseInsensitive bool
Aliases map[byte]byte // maps input bytes to replacement bytes before trigram extraction
DBName string // subdatabase name, default "fts"
MaxDBs int // LMDB max named databases, default 2
MapSize int64 // bytes, default 1GB
}
Options configures database creation and opening.
type Pair ¶
Pair is an opaque key-value pair for per-chunk metadata. Allows duplicate keys. Mirrors the DB wire format.
type RecordStats ¶
CRC: crc-DB.md | R445
type ScoreFunc ¶
type ScoreFunc func(queryTrigrams []uint32, chunkCounts map[uint32]int, chunkTokenCount int) float64
ScoreFunc computes a relevance score for a chunk. queryTrigrams: active query trigrams. chunkCounts: trigram -> occurrence count in the chunk. chunkTokenCount: number of tokens (words) in the chunk.
var ScoreCoverage ScoreFunc = scoreCoverage
ScoreCoverage is the coverage scoring function: fraction of active query trigrams present in chunk.
var ScoreDensityFunc ScoreFunc = scoreDensity
ScoreDensityFunc is the density scoring function for direct use with ScoreFile.
type ScoredChunk ¶
ScoredChunk is a per-chunk trigram match score from ScoreFile.
type SearchOption ¶
type SearchOption func(*searchConfig)
SearchOption configures search behavior.
func WithAfter ¶
func WithAfter(t time.Time) SearchOption
WithAfter keeps chunks with timestamp >= t. Checks "timestamp" attr first (parsed as Unix nanos); falls back to file mod time from F record. CRC: crc-DB.md | R258
func WithBefore ¶
func WithBefore(t time.Time) SearchOption
WithBefore keeps chunks with timestamp < t. Same fallback as WithAfter. CRC: crc-DB.md | R259
func WithChunkCache ¶
func WithChunkCache(cc *ChunkCache) SearchOption
WithChunkCache threads an external ChunkCache through post-filters (verify, regex, except-regex). When present, post-filters use the cache instead of re-reading files. R486
func WithChunkFilter ¶
func WithChunkFilter(fn ChunkFilter) SearchOption
WithChunkFilter adds a chunk filter. Multiple calls accumulate (AND semantics).
func WithCoverage ¶
func WithCoverage() SearchOption
WithCoverage uses coverage scoring (default): matching / total active query trigrams.
func WithDensity ¶
func WithDensity() SearchOption
WithDensity uses token-density scoring for long queries.
func WithExcept ¶
func WithExcept(ids map[uint64]struct{}) SearchOption
WithExcept excludes chunks from the given file IDs.
func WithExceptRegex ¶
func WithExceptRegex(patterns ...string) SearchOption
WithExceptRegex adds subtract post-filters: any match rejects the chunk. Multiple calls accumulate patterns. R184, R185
func WithLoose ¶
func WithLoose() SearchOption
CRC: crc-DB.md | Seq: seq-fuzzy-search.md | R336 WithLoose enables OR semantics at the term level: a chunk matches if it contains any query term's trigrams. Default scoring: terms matched / total terms.
func WithNoTmp ¶
func WithNoTmp() SearchOption
CRC: crc-DB.md | R376 WithNoTmp excludes tmp:// overlay documents from search results.
func WithOnly ¶
func WithOnly(ids map[uint64]struct{}) SearchOption
WithOnly restricts search to chunks from the given file IDs.
func WithOverlap ¶
func WithOverlap() SearchOption
CRC: crc-DB.md | R271 WithOverlap uses overlap scoring: matching trigram count, no normalization.
func WithProximityRerank ¶
func WithProximityRerank(topN int) SearchOption
CRC: crc-DB.md | R279 WithProximityRerank reranks the top-N results by query term proximity in chunk text.
func WithRegexFilter ¶
func WithRegexFilter(patterns ...string) SearchOption
WithRegexFilter adds AND post-filters: every pattern must match chunk content. Multiple calls accumulate patterns. R183, R185
func WithScoring ¶
func WithScoring(fn ScoreFunc) SearchOption
WithScoring uses a custom scoring function.
func WithTrigramFilter ¶
func WithTrigramFilter(fn TrigramFilter) SearchOption
WithTrigramFilter supplies a caller-defined trigram selection function.
func WithVerify ¶
func WithVerify() SearchOption
WithVerify enables post-filter verification: after trigram intersection, read chunk text from disk and verify each query term appears as a case-insensitive substring. Eliminates trigram false positives. R124, R125
type SearchResult ¶
type SearchResult struct {
Path string
Range string
Score float64
// contains filtered or unexported fields
}
SearchResult is a single match from Search. R99, R490, R491
type SearchResults ¶
type SearchResults struct {
Results []SearchResult
Status IndexStatus
}
SearchResults wraps search matches with index health status.
type Settings ¶
type Settings struct {
CaseInsensitive bool
Aliases map[byte]byte // byte→byte alias mapping
ChunkingStrategies map[string]string // name→cmd (empty cmd = func strategy)
}
Settings holds the in-memory representation of I records.
type StringDelim ¶
type StringDelim struct {
Open string // opening delimiter
Close string // closing delimiter (same as Open for symmetric quotes)
Escape string // escape character (empty = no escaping)
}
StringDelim defines a string delimiter and its escape character. R308
type TRecord ¶
TRecord is the trigram inverted index entry.
func (*TRecord) MarshalValue ¶
MarshalValue encodes the TRecord value (packed chunkid list).
type TokenEntry ¶
TokenEntry pairs a token string with its occurrence count.
type TrigramCount ¶
TrigramCount pairs a trigram code with its corpus document frequency.
func FilterAll ¶
func FilterAll(trigrams []TrigramCount, _ int) []TrigramCount
FilterAll uses every query trigram. No filtering.
type TrigramEntry ¶
TrigramEntry pairs a trigram code with its per-chunk occurrence count.
type TrigramFilter ¶
type TrigramFilter func(trigrams []TrigramCount, totalChunks int) []TrigramCount
TrigramFilter decides which trigrams to use for a given query. It receives the query's trigrams with their corpus-wide document frequencies, and the total number of indexed chunks. It returns the subset to search with.
func FilterBestN ¶
func FilterBestN(n int) TrigramFilter
FilterBestN returns a TrigramFilter that keeps the N trigrams with the lowest document frequency.
func FilterByRatio ¶
func FilterByRatio(maxRatio float64) TrigramFilter
FilterByRatio returns a TrigramFilter that skips trigrams appearing in more than maxRatio of total chunks. E.g., 0.50 skips trigrams in >50% of chunks.
type Trigrams ¶
type Trigrams struct {
// contains filtered or unexported fields
}
Trigrams extracts raw byte trigrams from text. Every byte is its own value — no character set mapping. Whitespace bytes are boundaries; runs collapse. Case insensitivity via bytes.ToLower(). Byte aliases applied before extraction.
func NewTrigrams ¶
NewTrigrams creates a trigram extractor.
func (*Trigrams) EncodeTrigram ¶
EncodeTrigram converts a 3-byte string to a 24-bit trigram using the same encoding as ExtractTrigrams: case folding, aliases, whitespace→0. Returns 0, false if the trigram cannot appear in the index (e.g. all whitespace, or consecutive whitespace which encode() collapses away).
func (*Trigrams) ExtractTrigrams ¶
ExtractTrigrams extracts all trigrams from data. Character-internal trigrams (windows entirely within a multibyte UTF-8 char) are skipped.