Documentation
¶
Index ¶
- Constants
- func BuildCAGRACachePath(namespace string, vectorDim, graphDegree, count int) string
- func CosineSimilarity(a, b []float32) float32
- func CosineSimilarityFloat64(a, b []float64) float64
- func CosineSimilarityInt8(a, b []uint8) float32
- func CosineSimilarityInt8Fallback(a, b []uint8) float32
- func DebugLoggingEnabled() bool
- func Debugf(format string, args ...interface{})
- func Debugln(args ...interface{})
- func DisableDebugLogging()
- func EnableDebugLogging()
- func ExampleVectorSearch()
- func FastQuantize(input []float32) ([]int8, float32)
- func FusedCAGRAAvailable() bool
- func GetCUDADeviceCount() int
- func GetCUDAVersion() string
- func GetEmbedBuffer() []float32
- func GetInt8Buffer() []int8
- func GetOptimalBatchSize() int
- func GetOptimalGPUBatchSize() int
- func GetSearchConfig(preset SearchPreset, estimatedSize int) search.Config
- func GetTokenBuffer() []int
- func IsCUDAAvailable() bool
- func LoadModelUnified(config *UnifiedModelConfig) (interface{}, error)
- func PutEmbedBuffer(buf []float32)
- func PutInt8Buffer(buf []int8)
- func PutTokenBuffer(buf []int)
- func SetDebugOutput(w io.Writer)
- func SetSimpleInt8Verbose(verbose bool)
- func ZeroCopyInt32ToFloat32(src []int32) []float32
- type BatchConfig
- type BatchEmbeddingResult
- type BatchProcessor
- type BatchResult
- type BufferPool
- type CAGRAConfig
- type CPUBulkIndexer
- type CPUBulkIndexerStats
- type CachedEmbedding
- type Document
- type EmbedInt8Result
- type EmbeddingModel
- func (m *EmbeddingModel) EmbedInt8(text string) (*EmbedInt8Result, error)
- func (m *EmbeddingModel) Encode(text string) ([]float32, error)
- func (m *EmbeddingModel) FindMostSimilar(query string, candidates []string, limit int) ([]SimilarityResult, error)
- func (m *EmbeddingModel) GetAvailableTexts() []string
- func (m *EmbeddingModel) OptimizedEmbedding(text string, cache *TokenPatternCache) ([]float32, error)
- func (m *EmbeddingModel) Similarity(text1, text2 string) (float32, error)
- type EmbeddingModelInt8
- type FusedCAGRAConfig
- type FusedCAGRAEngine
- func (engine *FusedCAGRAEngine) BuildIndex(embedWeights []int8, embedScales []float32, database []simd.Vec512, ...) error
- func (engine *FusedCAGRAEngine) Close()
- func (engine *FusedCAGRAEngine) GetStats() FusedCAGRAStats
- func (engine *FusedCAGRAEngine) Search(tokens []uint16) ([]SearchResult, error)
- func (engine *FusedCAGRAEngine) SearchBatch(tokenBatch [][]uint16, maxTokens int) ([][]SearchResult, error)
- type FusedCAGRAStats
- type GPUBatchProcessor
- type GPUBlockPool
- type GPUCagraConfig
- type GPUEmbeddingModel
- type GPUIndexer
- func (g *GPUIndexer) AddVectors(vectors [][]int8) error
- func (g *GPUIndexer) BatchSearch(queries [][]int8, k int) ([][]SearchResult, error)
- func (g *GPUIndexer) Close() error
- func (g *GPUIndexer) GetMemoryUsage() uint64
- func (g *GPUIndexer) GetStats() IndexStats
- func (g *GPUIndexer) IndexVectors(vectors []simd.Vec512, scales []float32) error
- func (g *GPUIndexer) Initialize() error
- func (g *GPUIndexer) IsReady() bool
- func (g *GPUIndexer) Search(query simd.Vec512, scale float32, k int) ([]int, []float32, error)
- func (g *GPUIndexer) TrainIndex(vectors [][]int8) error
- type GPUMemoryConfig
- type GPUMemoryManager
- func (m *GPUMemoryManager) AllocateQueryMemory() (unsafe.Pointer, error)
- func (m *GPUMemoryManager) AllocateResultMemory() (unsafe.Pointer, error)
- func (m *GPUMemoryManager) AllocateVectorMemory() (unsafe.Pointer, error)
- func (m *GPUMemoryManager) Close() error
- func (m *GPUMemoryManager) ForceGarbageCollection()
- func (m *GPUMemoryManager) FreeQueryMemory(ptr unsafe.Pointer)
- func (m *GPUMemoryManager) FreeResultMemory(ptr unsafe.Pointer)
- func (m *GPUMemoryManager) FreeVectorMemory(ptr unsafe.Pointer)
- func (m *GPUMemoryManager) GetMemoryStats() GPUMemoryStats
- func (m *GPUMemoryManager) StartMemoryMonitor(interval time.Duration)
- type GPUMemoryStats
- type GPUSearchServer
- type GPUServerConfig
- type GPUStats
- type IndexComparison
- type IndexConfig
- type IndexData
- type IndexProgress
- type IndexRequest
- type IndexResponse
- type IndexSnapshot
- type IndexStats
- type IndexingStats
- type Int8EmbeddingModel512
- func (m *Int8EmbeddingModel512) Embed(text string) ([]float32, error)
- func (m *Int8EmbeddingModel512) EmbedInt8(text string) (*Int8Result512, error)
- func (m *Int8EmbeddingModel512) EmbedTokens(tokens []int16) ([]float32, error)
- func (m *Int8EmbeddingModel512) GetInt8Weights() ([]int8, []float32)
- func (m *Int8EmbeddingModel512) GetMemoryUsage() string
- func (m *Int8EmbeddingModel512) Similarity(text1, text2 string) (float32, error)
- func (m *Int8EmbeddingModel512) Tokenize(text string) ([]int16, error)
- type Int8Result512
- type MemoryOptimizedCache
- type ModelCompatibilityWrapper
- type ObjectPool
- type OptimizedEmbeddingModel
- func (m *OptimizedEmbeddingModel) BatchEmbed(texts []string) ([]*EmbedInt8Result, error)
- func (m *OptimizedEmbeddingModel) EmbedInt8Optimized(text string) (*EmbedInt8Result, error)
- func (m *OptimizedEmbeddingModel) EmbedOptimized(text string) ([]float32, error)
- func (m *OptimizedEmbeddingModel) FastSearch(query string, limit int) ([]float32, error)
- func (m *OptimizedEmbeddingModel) GetStats() map[string]interface{}
- func (m *OptimizedEmbeddingModel) OptimizeForProduction(maxCacheSize int, gpuEnabled bool)
- func (m *OptimizedEmbeddingModel) PrecomputePatterns(patterns []string)
- func (m *OptimizedEmbeddingModel) WarmupCache()
- type ParallelIndexConfig
- type ParallelIndexStats
- type ParallelIndexer
- type ParallelProcessor
- type ParallelSearchEngine
- type PersistenceFormat
- type PersistenceStats
- type PoolStats
- type PrecomputedEmbeddings
- type PresetConfig
- type SaveOptions
- type SearchConfig
- type SearchEngine
- func FastSearchEngine(model *EmbeddingModel) *SearchEngine
- func NewAsyncSearchEngine(model *EmbeddingModel) *SearchEngine
- func NewAutoSearchEngine(model *EmbeddingModel) *SearchEngine
- func NewCAGRASearchEngine(model *EmbeddingModel) *SearchEngine
- func NewGPUSearchEngine(model *EmbeddingModel) *SearchEngine
- func NewSearchEngine(model *EmbeddingModel) *SearchEngine
- func NewSearchEngineWithConfig(model *EmbeddingModel, config SearchConfig) *SearchEngine
- func NewSearchEngineWithPreset(model *EmbeddingModel, preset SearchPreset) (*SearchEngine, error)
- func (se *SearchEngine) AutoSave(dir string, interval time.Duration)
- func (se *SearchEngine) Checkpoint(dir string) error
- func (se *SearchEngine) Clear()
- func (se *SearchEngine) Close() error
- func (se *SearchEngine) FindSimilar(documentID int, k int) ([]SearchResult, error)
- func (se *SearchEngine) Flush() error
- func (se *SearchEngine) GetAllDocuments() map[int]string
- func (se *SearchEngine) GetDocument(id int) (string, bool)
- func (se *SearchEngine) Index(text string) (int, error)
- func (se *SearchEngine) IndexBatch(texts []string) ([]int, error)
- func (se *SearchEngine) IndexBatchAsync(texts []string) <-chan IndexResponse
- func (se *SearchEngine) IndexBatchAsyncWithIDs(ids []int, texts []string) <-chan IndexResponse
- func (se *SearchEngine) IndexBatchWithIDs(ids []int, texts []string) error
- func (se *SearchEngine) IndexWithID(id int, text string) error
- func (se *SearchEngine) Load(path string) error
- func (se *SearchEngine) LoadFromDirectory(dir string) error
- func (se *SearchEngine) Optimize() error
- func (se *SearchEngine) QuickSave(path string) error
- func (se *SearchEngine) Save(path string, options SaveOptions) error
- func (se *SearchEngine) SaveToDirectory(dir string, options SaveOptions) error
- func (se *SearchEngine) Search(query string, k int) ([]SearchResult, error)
- func (se *SearchEngine) SearchWithOptions(query string, opts SearchOptions) ([]SearchResult, error)
- func (se *SearchEngine) Size() int
- func (se *SearchEngine) Stats() SearchEngineStats
- type SearchEngineStats
- type SearchOptions
- type SearchPreset
- type SearchRequest
- type SearchResponse
- type SearchResult
- type SearchServer
- type ServerConfig
- type ServerDocument
- type ServerIndexRequest
- type ServerIndexResponse
- type SharedIndexHeader
- type SharedIndexStats
- type SharedMemoryConfig
- type SharedMemoryIndex
- func (idx *SharedMemoryIndex) AddVector(vec *simd.Vec512, scale float32, id int) error
- func (idx *SharedMemoryIndex) BatchSearch(queries []*simd.Vec512, k int) [][]SearchResult
- func (idx *SharedMemoryIndex) Close() error
- func (idx *SharedMemoryIndex) GetVector(index int) (*simd.Vec512, error)
- func (idx *SharedMemoryIndex) ReleaseWriter()
- func (idx *SharedMemoryIndex) SearchTopK(query *simd.Vec512, k int) []SearchResult
- func (idx *SharedMemoryIndex) Stats() SharedIndexStats
- func (idx *SharedMemoryIndex) Sync() error
- func (idx *SharedMemoryIndex) TryAcquireWriter() bool
- func (idx *SharedMemoryIndex) WaitForWrites(targetSeq uint64)
- type SimilarityResult
- type SimpleInt8Model512
- func (m *SimpleInt8Model512) Close() error
- func (m *SimpleInt8Model512) Embed(text string) ([]float32, error)
- func (m *SimpleInt8Model512) EmbedBatchInt8(texts []string) ([]*Int8Result512, error)
- func (m *SimpleInt8Model512) EmbedBatchInt8Optimized(texts []string, progressCallback func(processed, total int)) ([]*Int8Result512, error)
- func (m *SimpleInt8Model512) EmbedDim() int
- func (m *SimpleInt8Model512) EmbedFast(text string) ([]float32, func())
- func (m *SimpleInt8Model512) EmbedInt8(text string) (*Int8Result512, error)
- func (m *SimpleInt8Model512) EmbedTokens(tokens []int16) ([]float32, error)
- func (m *SimpleInt8Model512) EmbedTokensInt8(tokens []int16) (*Int8Result512, error)
- func (m *SimpleInt8Model512) EmbedTokensInto(tokens []int16, result []float32) int
- func (m *SimpleInt8Model512) EmbedTokensIntoSIMD(tokens []int16, result []float32) int
- func (m *SimpleInt8Model512) EmbeddingTable() [][]int8
- func (m *SimpleInt8Model512) ScaleTable() []float32
- func (m *SimpleInt8Model512) Similarity(text1, text2 string) (float32, error)
- func (m *SimpleInt8Model512) SimpleTokenize(text string) []int16
- func (m *SimpleInt8Model512) VocabSize() int
- type SimplifiedSearchConfig
- type TensorInfo
- type TokenData
- type TokenFrequencyData
- type TokenFrequencySection
- type TokenPatternCache
- func (c *TokenPatternCache) BatchGetEmbeddings(tokenBatches [][]int) ([]*CachedEmbedding, []bool)
- func (c *TokenPatternCache) ComputeEmbeddingWithCache(tokens []int, computeFn func([]int) ([]float32, error)) ([]float32, error)
- func (c *TokenPatternCache) FilterStopwords(tokens []int, textLength int) []int
- func (c *TokenPatternCache) GetCachedEmbedding(tokens []int) (*CachedEmbedding, bool)
- func (c *TokenPatternCache) GetStats() map[string]interface{}
- func (c *TokenPatternCache) PrecomputeCommonPatterns(model *EmbeddingModel, patterns [][]int)
- type TokenPatternSection
- type TokenizerOptimizations
- type UnifiedModelConfig
- type VectorBuffer
- type VectorIndex
- func (idx *VectorIndex) AddDocument(doc Document) error
- func (idx *VectorIndex) AddDocuments(docs []Document) error
- func (idx *VectorIndex) AddDocumentsBulkGPU(docs []Document) error
- func (idx *VectorIndex) AddDocumentsWithMonitoring(docs []Document) (<-chan interface{}, error)
- func (idx *VectorIndex) Search(query string, k int) ([]SearchResult, error)
- func (idx *VectorIndex) Size() int
- func (idx *VectorIndex) Stats() VectorIndexStats
- func (idx *VectorIndex) Train(texts []string) error
- type VectorIndexConfig
- type VectorIndexStats
Constants ¶
const ( MaxErrorTextLength = 50 DefaultBatchSize = 256 DefaultTimeout = 30 * time.Second )
Constants for configuration
const ( // Model dimensions Int8EmbeddingDim = 512 Int8VocabSize = 30522 )
Variables ¶
This section is empty.
Functions ¶
func BuildCAGRACachePath ¶
BuildCAGRACachePath generates the cache path used in tests.
func CosineSimilarity ¶
CosineSimilarity calculates cosine similarity between two float32 vectors
func CosineSimilarityFloat64 ¶
CosineSimilarityFloat64 calculates cosine similarity between two float64 vectors
func CosineSimilarityInt8 ¶
CosineSimilarityInt8 computes similarity between INT8 vectors using SIMD
func CosineSimilarityInt8Fallback ¶
CosineSimilarityInt8Fallback is a pure Go fallback for systems without AVX-512
func DebugLoggingEnabled ¶
func DebugLoggingEnabled() bool
DebugLoggingEnabled returns true when debug logging is currently enabled.
func Debugf ¶
func Debugf(format string, args ...interface{})
Debugf emits a formatted debug message when debug logging is enabled.
func Debugln ¶
func Debugln(args ...interface{})
Debugln emits a debug message with default formatting when enabled.
func DisableDebugLogging ¶
func DisableDebugLogging()
DisableDebugLogging globally disables debug logging output.
func EnableDebugLogging ¶
func EnableDebugLogging()
EnableDebugLogging globally enables debug logging output.
func FastQuantize ¶
FastQuantize performs optimized quantization with SIMD hints
func FusedCAGRAAvailable ¶
func FusedCAGRAAvailable() bool
FusedCAGRAAvailable reports whether the CUDA implementation is linked in.
func GetCUDADeviceCount ¶
func GetCUDADeviceCount() int
GetCUDADeviceCount returns 0 for non-GPU builds
func GetCUDAVersion ¶
func GetCUDAVersion() string
GetCUDAVersion returns empty string for non-GPU builds
func GetEmbedBuffer ¶
func GetEmbedBuffer() []float32
GetEmbedBuffer returns a reusable embedding buffer
func GetOptimalBatchSize ¶
func GetOptimalBatchSize() int
GetOptimalBatchSize returns the optimal batch size based on available memory
func GetOptimalGPUBatchSize ¶
func GetOptimalGPUBatchSize() int
GetOptimalGPUBatchSize returns optimal batch size for GPU processing
func GetSearchConfig ¶
func GetSearchConfig(preset SearchPreset, estimatedSize int) search.Config
GetSearchConfig returns the appropriate search configuration for a preset
func LoadModelUnified ¶
func LoadModelUnified(config *UnifiedModelConfig) (interface{}, error)
LoadModelUnified loads the best available model with consistent path handling
func PutEmbedBuffer ¶
func PutEmbedBuffer(buf []float32)
PutEmbedBuffer returns an embedding buffer to the pool
func PutInt8Buffer ¶
func PutInt8Buffer(buf []int8)
PutInt8Buffer returns an int8 buffer to the pool
func PutTokenBuffer ¶
func PutTokenBuffer(buf []int)
PutTokenBuffer returns a token buffer to the pool
func SetDebugOutput ¶
SetDebugOutput overrides the writer used for debug output.
func SetSimpleInt8Verbose ¶
func SetSimpleInt8Verbose(verbose bool)
SetSimpleInt8Verbose controls whether model loading logs are printed
func ZeroCopyInt32ToFloat32 ¶
ZeroCopyConversion performs zero-copy type conversions when possible
Types ¶
type BatchConfig ¶
BatchConfig holds optimized batch processing configuration
type BatchEmbeddingResult ¶
type BatchEmbeddingResult struct {
Embeddings [][]float32
Duration time.Duration
BatchSize int
ItemsPerSec float64
}
BatchEmbeddingResult contains results from batch processing
type BatchProcessor ¶
type BatchProcessor struct {
// contains filtered or unexported fields
}
BatchProcessor provides optimized batch processing
func NewBatchProcessor ¶
func NewBatchProcessor(batchSize, workers int) *BatchProcessor
NewBatchProcessor creates an optimized batch processor
func (*BatchProcessor) ProcessBatch ¶
func (bp *BatchProcessor) ProcessBatch(texts []string, model *EmbeddingModel) ([]simd.Vec512, []float32, error)
ProcessBatch processes documents in optimized batches
type BatchResult ¶
BatchResult represents the result of batch processing
type BufferPool ¶
type BufferPool struct {
// contains filtered or unexported fields
}
BufferPool manages reusable buffers to reduce allocations
type CAGRAConfig ¶
type CAGRAConfig struct {
MaxVectors int
VectorDim int
GraphDegree int
MaxIterations int
CachePath string
TargetLatencyUs int
TargetRecall float32
}
CAGRAConfig captures the subset of configuration fields that tests rely on. The full implementation lives behind the cagra build tag.
func DefaultCAGRAConfig ¶
func DefaultCAGRAConfig() CAGRAConfig
DefaultCAGRAConfig mirrors the exported API so callers do not need build tags.
func FastCAGRAConfig ¶
func FastCAGRAConfig() CAGRAConfig
FastCAGRAConfig returns a speed-optimized configuration without requiring CUDA.
func QualityCAGRAConfig ¶
func QualityCAGRAConfig() CAGRAConfig
QualityCAGRAConfig returns a quality-focused configuration for callers that expect it.
type CPUBulkIndexer ¶
type CPUBulkIndexer struct {
// contains filtered or unexported fields
}
CPUBulkIndexer provides fast bulk indexing using CPU with parallelization
func NewCPUBulkIndexer ¶
func NewCPUBulkIndexer(index *VectorIndex, batchSize int) *CPUBulkIndexer
NewCPUBulkIndexer creates a new CPU bulk indexer
func (*CPUBulkIndexer) IndexBatch ¶
func (idx *CPUBulkIndexer) IndexBatch(docs []Document) error
IndexBatch processes a batch of documents using CPU with parallelization
func (*CPUBulkIndexer) Stats ¶
func (idx *CPUBulkIndexer) Stats() CPUBulkIndexerStats
Stats returns indexing statistics
type CPUBulkIndexerStats ¶
type CPUBulkIndexerStats struct {
TotalIndexed int64
TotalTime time.Duration
BatchSize int
NumWorkers int
Throughput float64
}
CPUBulkIndexerStats contains CPU bulk indexing statistics
func (CPUBulkIndexerStats) LogStats ¶
func (stats CPUBulkIndexerStats) LogStats()
LogStats logs the indexing statistics
type CachedEmbedding ¶
type CachedEmbedding struct {
Vector []float32
VectorI8 []int8 // Quantized version
Scale float32 // Quantization scale
UseCount uint32 // Track usage for cache eviction
LastUsed int64 // Unix timestamp
}
CachedEmbedding stores a precomputed embedding
type EmbedInt8Result ¶
EmbedInt8Result represents an int8 quantized embedding
type EmbeddingModel ¶
EmbeddingModel provides a clean API for text embeddings using the real static-retrieval-mrl-en-v1 model
func LoadModel ¶
func LoadModel() (*EmbeddingModel, error)
LoadModel loads the real static-retrieval-mrl-en-v1 embedding model
func (*EmbeddingModel) EmbedInt8 ¶
func (m *EmbeddingModel) EmbedInt8(text string) (*EmbedInt8Result, error)
EmbedInt8 generates int8 quantized embeddings
func (*EmbeddingModel) Encode ¶
func (m *EmbeddingModel) Encode(text string) ([]float32, error)
Encode converts text to embedding vector using real model weights
func (*EmbeddingModel) FindMostSimilar ¶
func (m *EmbeddingModel) FindMostSimilar(query string, candidates []string, limit int) ([]SimilarityResult, error)
FindMostSimilar finds the most similar texts to a query from a list of candidates
func (*EmbeddingModel) GetAvailableTexts ¶
func (m *EmbeddingModel) GetAvailableTexts() []string
GetAvailableTexts returns all texts that can be encoded (from reference tokens)
func (*EmbeddingModel) OptimizedEmbedding ¶
func (m *EmbeddingModel) OptimizedEmbedding(text string, cache *TokenPatternCache) ([]float32, error)
OptimizedEmbedding wraps the embedding computation with caching
func (*EmbeddingModel) Similarity ¶
func (m *EmbeddingModel) Similarity(text1, text2 string) (float32, error)
Similarity calculates cosine similarity between two texts
type EmbeddingModelInt8 ¶
type EmbeddingModelInt8 struct {
VocabSize int
EmbedDim int
// contains filtered or unexported fields
}
EmbeddingModelInt8 provides INT8 quantized embeddings with SIMD acceleration
func LoadModelInt8 ¶
func LoadModelInt8(useInt8 bool) (*EmbeddingModelInt8, error)
LoadModelInt8 loads the model with INT8 quantization support
func (*EmbeddingModelInt8) ComputeEmbeddingFromTokens ¶
func (m *EmbeddingModelInt8) ComputeEmbeddingFromTokens(tokenIDs []int) ([]uint8, error)
ComputeEmbeddingFromTokens computes INT8 embedding from token IDs
type FusedCAGRAConfig ¶
FusedCAGRAConfig configures the fused CAGRA engine.
func DefaultFusedCAGRAConfig ¶
func DefaultFusedCAGRAConfig() FusedCAGRAConfig
DefaultFusedCAGRAConfig returns the default configuration used by production builds.
type FusedCAGRAEngine ¶
type FusedCAGRAEngine struct {
// contains filtered or unexported fields
}
FusedCAGRAEngine provides a CPU fallback that mimics the fused GPU pipeline when CUDA libraries are unavailable.
func NewFusedCAGRAEngine ¶
func NewFusedCAGRAEngine(config FusedCAGRAConfig) (*FusedCAGRAEngine, error)
NewFusedCAGRAEngine creates a CPU-backed fused engine when GPU kernels are not available.
func (*FusedCAGRAEngine) BuildIndex ¶
func (engine *FusedCAGRAEngine) BuildIndex( embedWeights []int8, embedScales []float32, database []simd.Vec512, dbScales []float32, ) error
BuildIndex loads quantized embeddings and prepares the CPU search buffers.
func (*FusedCAGRAEngine) Close ¶
func (engine *FusedCAGRAEngine) Close()
Close releases any CPU resources (no-op for fallback mode).
func (*FusedCAGRAEngine) GetStats ¶
func (engine *FusedCAGRAEngine) GetStats() FusedCAGRAStats
GetStats returns statistics collected during CPU fallback execution.
func (*FusedCAGRAEngine) Search ¶
func (engine *FusedCAGRAEngine) Search(tokens []uint16) ([]SearchResult, error)
Search runs a single-query search through the CPU fallback.
func (*FusedCAGRAEngine) SearchBatch ¶
func (engine *FusedCAGRAEngine) SearchBatch(tokenBatch [][]uint16, maxTokens int) ([][]SearchResult, error)
SearchBatch executes a CPU implementation of the fused pipeline.
type FusedCAGRAStats ¶
type FusedCAGRAStats struct {
VocabSize int
EmbedDim int
NumVectors int
TopK int
AvgSearchTimeMs float64
SearchCount int64
IsBuilt bool
}
FusedCAGRAStats mirrors the GPU implementation statistics payload.
type GPUBatchProcessor ¶
type GPUBatchProcessor struct {
// contains filtered or unexported fields
}
GPUBatchProcessor stub for non-GPU builds
func NewGPUBatchProcessor ¶
func NewGPUBatchProcessor(model *EmbeddingModel, cache *TokenPatternCache) *GPUBatchProcessor
NewGPUBatchProcessor returns a stub for non-GPU builds
func (*GPUBatchProcessor) GetMetrics ¶
func (p *GPUBatchProcessor) GetMetrics() map[string]interface{}
GetMetrics returns empty metrics for non-GPU builds
func (*GPUBatchProcessor) GetStats ¶
func (p *GPUBatchProcessor) GetStats() map[string]interface{}
GetStats returns empty stats for non-GPU builds
func (*GPUBatchProcessor) ProcessBatch ¶
func (p *GPUBatchProcessor) ProcessBatch(texts []string) ([]*EmbedInt8Result, error)
ProcessBatch returns error for non-GPU builds
func (*GPUBatchProcessor) Shutdown ¶
func (p *GPUBatchProcessor) Shutdown()
Shutdown is a no-op for non-GPU builds
type GPUBlockPool ¶
type GPUBlockPool struct {
// contains filtered or unexported fields
}
type GPUCagraConfig ¶
GPUCagraConfig is defined in gpu builds; provide a stub for non-GPU builds
type GPUEmbeddingModel ¶
type GPUEmbeddingModel struct {
*EmbeddingModel
// contains filtered or unexported fields
}
GPUEmbeddingModel provides GPU-accelerated batch embedding processing
func NewGPUEmbeddingModel ¶
func NewGPUEmbeddingModel(batchSize int, useGPU bool) (*GPUEmbeddingModel, error)
NewGPUEmbeddingModel creates a GPU-optimized embedding model
func (*GPUEmbeddingModel) EncodeBatch ¶
func (g *GPUEmbeddingModel) EncodeBatch(texts []string) (*BatchEmbeddingResult, error)
EncodeBatch processes multiple texts in optimized batches
func (*GPUEmbeddingModel) MemoryOptimizedEncodeBatch ¶
func (g *GPUEmbeddingModel) MemoryOptimizedEncodeBatch(texts []string, maxMemoryMB int) (*BatchEmbeddingResult, error)
MemoryOptimizedEncodeBatch processes with memory efficiency
func (*GPUEmbeddingModel) OptimalBatchSize ¶
func (g *GPUEmbeddingModel) OptimalBatchSize() int
OptimalBatchSize determines the best batch size for the current hardware
type GPUIndexer ¶
type GPUIndexer struct {
// contains filtered or unexported fields
}
GPUIndexer stub for non-GPU builds
func NewGPUIndexer ¶
func NewGPUIndexer(config IndexConfig) (*GPUIndexer, error)
NewGPUIndexer returns error for non-GPU builds
func (*GPUIndexer) AddVectors ¶
func (g *GPUIndexer) AddVectors(vectors [][]int8) error
AddVectors returns error for non-GPU builds
func (*GPUIndexer) BatchSearch ¶
func (g *GPUIndexer) BatchSearch(queries [][]int8, k int) ([][]SearchResult, error)
BatchSearch returns error for non-GPU builds
func (*GPUIndexer) GetMemoryUsage ¶
func (g *GPUIndexer) GetMemoryUsage() uint64
GetMemoryUsage returns 0 for non-GPU builds
func (*GPUIndexer) GetStats ¶
func (g *GPUIndexer) GetStats() IndexStats
GetStats returns empty stats for non-GPU builds
func (*GPUIndexer) IndexVectors ¶
func (g *GPUIndexer) IndexVectors(vectors []simd.Vec512, scales []float32) error
IndexVectors returns error for non-GPU builds
func (*GPUIndexer) Initialize ¶
func (g *GPUIndexer) Initialize() error
Initialize returns error for non-GPU builds
func (*GPUIndexer) IsReady ¶
func (g *GPUIndexer) IsReady() bool
IsReady returns false for non-GPU builds
func (*GPUIndexer) TrainIndex ¶
func (g *GPUIndexer) TrainIndex(vectors [][]int8) error
TrainIndex returns error for non-GPU builds
type GPUMemoryConfig ¶
type GPUMemoryConfig struct {
DeviceID int
MaxMemoryUsagePercent float64
VectorPoolBlockSize uint64
QueryPoolBlockSize uint64
ResultPoolBlockSize uint64
MaxVectorBlocks int
MaxQueryBlocks int
MaxResultBlocks int
ReserveMemoryMB uint64
}
func DefaultGPUMemoryConfig ¶
func DefaultGPUMemoryConfig() GPUMemoryConfig
type GPUMemoryManager ¶
type GPUMemoryManager struct {
// contains filtered or unexported fields
}
func NewGPUMemoryManager ¶
func NewGPUMemoryManager(config GPUMemoryConfig) (*GPUMemoryManager, error)
func (*GPUMemoryManager) AllocateQueryMemory ¶
func (m *GPUMemoryManager) AllocateQueryMemory() (unsafe.Pointer, error)
func (*GPUMemoryManager) AllocateResultMemory ¶
func (m *GPUMemoryManager) AllocateResultMemory() (unsafe.Pointer, error)
func (*GPUMemoryManager) AllocateVectorMemory ¶
func (m *GPUMemoryManager) AllocateVectorMemory() (unsafe.Pointer, error)
func (*GPUMemoryManager) Close ¶
func (m *GPUMemoryManager) Close() error
func (*GPUMemoryManager) ForceGarbageCollection ¶
func (m *GPUMemoryManager) ForceGarbageCollection()
func (*GPUMemoryManager) FreeQueryMemory ¶
func (m *GPUMemoryManager) FreeQueryMemory(ptr unsafe.Pointer)
func (*GPUMemoryManager) FreeResultMemory ¶
func (m *GPUMemoryManager) FreeResultMemory(ptr unsafe.Pointer)
func (*GPUMemoryManager) FreeVectorMemory ¶
func (m *GPUMemoryManager) FreeVectorMemory(ptr unsafe.Pointer)
func (*GPUMemoryManager) GetMemoryStats ¶
func (m *GPUMemoryManager) GetMemoryStats() GPUMemoryStats
func (*GPUMemoryManager) StartMemoryMonitor ¶
func (m *GPUMemoryManager) StartMemoryMonitor(interval time.Duration)
type GPUMemoryStats ¶
type GPUSearchServer ¶
type GPUSearchServer struct {
// contains filtered or unexported fields
}
GPUSearchServer provides a high-performance CUDA-accelerated HTTP search server
func NewGPUSearchServer ¶
func NewGPUSearchServer(model *EmbeddingModel, config GPUServerConfig) (*GPUSearchServer, error)
NewGPUSearchServer creates a new CUDA-accelerated search server
func (*GPUSearchServer) Start ¶
func (s *GPUSearchServer) Start() error
Start starts the GPU-accelerated search server
func (*GPUSearchServer) Stop ¶
func (s *GPUSearchServer) Stop() error
Stop gracefully stops the GPU search server
type GPUServerConfig ¶
type GPUServerConfig struct {
Port int
MaxVectors int
MaxConcurrency int
EnableProfiling bool
EnableMetrics bool
ReadOnly bool
PreloadEmbeddings bool
WorkerThreads int
// GPU-specific configuration
GPUDeviceID int
GPUBatchSize int
EnableGPUFallback bool
GPUMemoryLimitMB int
IndexingBatchSize int
}
GPUServerConfig configures the GPU-accelerated search server
func DefaultGPUServerConfig ¶
func DefaultGPUServerConfig() GPUServerConfig
DefaultGPUServerConfig returns optimized GPU server configuration
type GPUStats ¶
type GPUStats struct {
DeviceID int
MemoryUsed int64
MemoryTotal int64
Utilization float32
Temperature float32
}
GPUStats represents GPU statistics
type IndexComparison ¶
type IndexComparison struct {
NumDocuments int
SequentialTime time.Duration
AsyncTime time.Duration
ParallelTime time.Duration
GPUTime time.Duration
SequentialError error
AsyncError error
ParallelError error
GPUError error
AsyncSpeedup float64
ParallelSpeedup float64
GPUSpeedup float64
}
IndexComparison contains comparison results
type IndexConfig ¶
type IndexConfig struct {
VectorDim int
NumSubquantizers int
CodebookSize int
IVFClusters int
ProbeLists int
RerankK int
DeviceID int
}
IndexConfig stub
func DefaultGPUConfig ¶
func DefaultGPUConfig() IndexConfig
DefaultGPUConfig returns a default configuration for GPU indexing (stub)
type IndexData ¶
type IndexData struct {
// Core index data
Vectors [][]float32 `json:"-"` // Skip in JSON, too large
VectorsBinary []byte `json:"vectors_binary,omitempty"`
IDs []int `json:"ids"`
// Index state
IndexType string `json:"index_type"`
Trained bool `json:"trained"`
// Stats
MemoryUsageMB float64 `json:"memory_usage_mb"`
}
IndexData contains the serializable index structures
type IndexProgress ¶
type IndexProgress struct {
Current int
Total int
Percentage float64
DocsPerSec float64
TimeLeft time.Duration
}
IndexProgress represents indexing progress
type IndexRequest ¶
type IndexRequest struct {
IDs []int
Texts []string
Response chan IndexResponse
Context context.Context
}
IndexRequest represents an async indexing request
type IndexResponse ¶
type IndexResponse struct {
IDs []int
Error error
Stats IndexingStats
}
IndexResponse contains the result of async indexing
type IndexSnapshot ¶
type IndexSnapshot struct {
Version string `json:"version"`
CreatedAt time.Time `json:"created_at"`
NumDocuments int `json:"num_documents"`
Config SearchConfig `json:"config"`
Documents map[int]string `json:"documents"`
IndexData *IndexData `json:"index_data,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
IndexSnapshot represents a serializable snapshot of a SearchEngine
type IndexStats ¶
type IndexStats struct {
NumVectors int
VectorDim int
IVFClusters int
PQSubquantizers int
GPUMemoryMB float32
IsTrained bool
IndexBuilt bool
}
IndexStats provides indexer statistics
type IndexingStats ¶
type IndexingStats struct {
DocumentsProcessed int
ProcessingTime time.Duration
EmbeddingTime time.Duration
IndexingTime time.Duration
}
IndexingStats provides indexing performance metrics
type Int8EmbeddingModel512 ¶
type Int8EmbeddingModel512 struct {
// contains filtered or unexported fields
}
Int8EmbeddingModel512 represents the int8 quantized model with 512 dimensions
func LoadFastModel ¶
func LoadFastModel() (*Int8EmbeddingModel512, error)
LoadFastModel is a convenience function that always loads the fastest model
func LoadInt8Model512 ¶
func LoadInt8Model512() (*Int8EmbeddingModel512, error)
LoadInt8Model512 loads the int8 quantized model with 512 dimensions
func (*Int8EmbeddingModel512) Embed ¶
func (m *Int8EmbeddingModel512) Embed(text string) ([]float32, error)
Embed embeds text using int8 model
func (*Int8EmbeddingModel512) EmbedInt8 ¶
func (m *Int8EmbeddingModel512) EmbedInt8(text string) (*Int8Result512, error)
EmbedInt8 returns int8 quantized embedding
func (*Int8EmbeddingModel512) EmbedTokens ¶
func (m *Int8EmbeddingModel512) EmbedTokens(tokens []int16) ([]float32, error)
EmbedTokens embeds int16 token IDs directly (no tokenization needed)
func (*Int8EmbeddingModel512) GetInt8Weights ¶
func (m *Int8EmbeddingModel512) GetInt8Weights() ([]int8, []float32)
GetInt8Weights returns flattened embedding weights and scales for GPU use
func (*Int8EmbeddingModel512) GetMemoryUsage ¶
func (m *Int8EmbeddingModel512) GetMemoryUsage() string
GetMemoryUsage returns the memory usage of the model
func (*Int8EmbeddingModel512) Similarity ¶
func (m *Int8EmbeddingModel512) Similarity(text1, text2 string) (float32, error)
Similarity computes cosine similarity between two texts using int8 embeddings
type Int8Result512 ¶
type Int8Result512 struct {
Vector []int8 // 512-dimensional int8 vector
Scale float32 // Scale factor for dequantization
}
EmbedInt8Result represents the result with int8 vector and scale
type MemoryOptimizedCache ¶
type MemoryOptimizedCache struct {
// contains filtered or unexported fields
}
MemoryOptimizedCache provides a cache with memory management
func NewMemoryOptimizedCache ¶
func NewMemoryOptimizedCache(maxSize int) *MemoryOptimizedCache
NewMemoryOptimizedCache creates a memory-optimized embedding cache
func (*MemoryOptimizedCache) Get ¶
func (c *MemoryOptimizedCache) Get(text string) (*EmbedInt8Result, bool)
Get retrieves an embedding from cache
func (*MemoryOptimizedCache) Put ¶
func (c *MemoryOptimizedCache) Put(text string, embedding *EmbedInt8Result)
Put stores an embedding in cache
func (*MemoryOptimizedCache) Size ¶
func (c *MemoryOptimizedCache) Size() int
Size returns current cache size
type ModelCompatibilityWrapper ¶
type ModelCompatibilityWrapper struct {
// contains filtered or unexported fields
}
ModelCompatibilityWrapper provides a common interface for both model types
func LoadCompatibleModel ¶
func LoadCompatibleModel() (*ModelCompatibilityWrapper, error)
LoadCompatibleModel loads a model that works with all existing code
func (*ModelCompatibilityWrapper) Encode ¶
func (w *ModelCompatibilityWrapper) Encode(text string) ([]float32, error)
Encode provides a unified interface for both model types
func (*ModelCompatibilityWrapper) EncodeInt8 ¶
func (w *ModelCompatibilityWrapper) EncodeInt8(text string) ([]int8, error)
EncodeInt8 provides fast int8 encoding when available
type ObjectPool ¶
type ObjectPool struct {
// contains filtered or unexported fields
}
ObjectPool provides reusable object pools to reduce allocations
func (*ObjectPool) GetEmbedding ¶
func (p *ObjectPool) GetEmbedding() *EmbedInt8Result
GetEmbedding gets a reusable embedding result from pool
func (*ObjectPool) GetSlice ¶
func (p *ObjectPool) GetSlice() *[]float32
GetSlice gets a reusable float32 slice from pool
func (*ObjectPool) GetVector ¶
func (p *ObjectPool) GetVector() *simd.Vec512
GetVector gets a reusable vector from pool
func (*ObjectPool) PutEmbedding ¶
func (p *ObjectPool) PutEmbedding(emb *EmbedInt8Result)
PutEmbedding returns embedding to pool
func (*ObjectPool) PutSlice ¶
func (p *ObjectPool) PutSlice(slice *[]float32)
PutSlice returns slice to pool
func (*ObjectPool) PutVector ¶
func (p *ObjectPool) PutVector(vec *simd.Vec512)
PutVector returns vector to pool
type OptimizedEmbeddingModel ¶
type OptimizedEmbeddingModel struct {
*EmbeddingModel
// contains filtered or unexported fields
}
OptimizedEmbeddingModel extends the base model with caching and batch processing
func LoadOptimizedModel ¶
func LoadOptimizedModel() (*OptimizedEmbeddingModel, error)
LoadOptimizedModel loads the model with all optimizations enabled
func (*OptimizedEmbeddingModel) BatchEmbed ¶
func (m *OptimizedEmbeddingModel) BatchEmbed(texts []string) ([]*EmbedInt8Result, error)
BatchEmbed processes multiple texts efficiently
func (*OptimizedEmbeddingModel) EmbedInt8Optimized ¶
func (m *OptimizedEmbeddingModel) EmbedInt8Optimized(text string) (*EmbedInt8Result, error)
EmbedInt8Optimized generates quantized embeddings with optimizations
func (*OptimizedEmbeddingModel) EmbedOptimized ¶
func (m *OptimizedEmbeddingModel) EmbedOptimized(text string) ([]float32, error)
EmbedOptimized generates embeddings with all optimizations
func (*OptimizedEmbeddingModel) FastSearch ¶
func (m *OptimizedEmbeddingModel) FastSearch(query string, limit int) ([]float32, error)
FastSearch performs optimized search for AI images
func (*OptimizedEmbeddingModel) GetStats ¶
func (m *OptimizedEmbeddingModel) GetStats() map[string]interface{}
GetStats returns performance statistics
func (*OptimizedEmbeddingModel) OptimizeForProduction ¶
func (m *OptimizedEmbeddingModel) OptimizeForProduction(maxCacheSize int, gpuEnabled bool)
OptimizeForProduction applies production-ready optimizations
func (*OptimizedEmbeddingModel) PrecomputePatterns ¶
func (m *OptimizedEmbeddingModel) PrecomputePatterns(patterns []string)
PrecomputePatterns adds new patterns to the cache
func (*OptimizedEmbeddingModel) WarmupCache ¶
func (m *OptimizedEmbeddingModel) WarmupCache()
WarmupCache preloads common search patterns
type ParallelIndexConfig ¶
type ParallelIndexConfig struct {
NumWorkers int // Number of parallel workers (default: NumCPU)
BatchSize int // Batch size for processing (default: 100)
EnableCache bool // Enable embedding cache (default: true)
QueueSize int // Task queue size (default: 1000)
}
ParallelIndexConfig configures parallel indexing
func DefaultParallelIndexConfig ¶
func DefaultParallelIndexConfig() ParallelIndexConfig
DefaultParallelIndexConfig returns optimized configuration
type ParallelIndexStats ¶
type ParallelIndexStats struct {
TotalIndexed uint64
TotalTime time.Duration
Errors uint32
NumWorkers int
BatchSize int
DocsPerSec float64
}
ParallelIndexStats contains parallel indexing statistics
type ParallelIndexer ¶
type ParallelIndexer struct {
// contains filtered or unexported fields
}
ParallelIndexer provides high-performance parallel indexing
func NewParallelIndexer ¶
func NewParallelIndexer(engine *SearchEngine, config ParallelIndexConfig) *ParallelIndexer
NewParallelIndexer creates a new parallel indexer
func (*ParallelIndexer) IndexDocumentsParallel ¶
func (p *ParallelIndexer) IndexDocumentsParallel(texts []string) ([]int, error)
IndexDocumentsParallel indexes documents using parallel processing
func (*ParallelIndexer) IndexWithProgress ¶
func (p *ParallelIndexer) IndexWithProgress(texts []string) (<-chan IndexProgress, error)
IndexWithProgress indexes documents with progress reporting
func (*ParallelIndexer) OptimizeWorkers ¶
func (p *ParallelIndexer) OptimizeWorkers(testDocs []string) (int, error)
OptimizeWorkers finds the optimal number of workers
func (*ParallelIndexer) Stats ¶
func (p *ParallelIndexer) Stats() ParallelIndexStats
Stats returns parallel indexer statistics
type ParallelProcessor ¶
type ParallelProcessor struct {
// contains filtered or unexported fields
}
ParallelProcessor handles parallel batch processing with controlled concurrency
func NewParallelProcessor ¶
func NewParallelProcessor() *ParallelProcessor
NewParallelProcessor creates a processor with optimal worker count
func (*ParallelProcessor) Close ¶
func (p *ParallelProcessor) Close()
Close shuts down the processor
func (*ParallelProcessor) ProcessBatch ¶
func (p *ParallelProcessor) ProcessBatch(items []func())
ProcessBatch processes items in parallel with controlled concurrency
type ParallelSearchEngine ¶
type ParallelSearchEngine struct {
*SearchEngine
// contains filtered or unexported fields
}
ParallelSearchEngine extends SearchEngine with parallel capabilities
func NewParallelSearchEngine ¶
func NewParallelSearchEngine(model *EmbeddingModel, config SearchConfig) *ParallelSearchEngine
NewParallelSearchEngine creates a search engine with parallel indexing
func (*ParallelSearchEngine) IndexBatchParallel ¶
func (e *ParallelSearchEngine) IndexBatchParallel(texts []string) ([]int, error)
IndexBatchParallel indexes documents using CPU parallelization
func (*ParallelSearchEngine) IndexBatchWithComparison ¶
func (e *ParallelSearchEngine) IndexBatchWithComparison(texts []string) (*IndexComparison, error)
IndexBatchWithComparison compares different indexing methods
type PersistenceFormat ¶
type PersistenceFormat string
PersistenceFormat represents the format for saving/loading indexes
const ( // FormatBinary uses Go's gob encoding (fastest, Go-specific) FormatBinary PersistenceFormat = "binary" // FormatJSON uses JSON encoding (portable but slower) FormatJSON PersistenceFormat = "json" )
type PersistenceStats ¶
type PersistenceStats struct {
LastSaved time.Time
SaveCount int
LoadCount int
LastLoadTime time.Duration
LastSaveTime time.Duration
}
PersistenceStats returns statistics about saved indexes
func GetPersistenceStats ¶
func GetPersistenceStats() PersistenceStats
GetPersistenceStats returns persistence statistics
type PrecomputedEmbeddings ¶
type PrecomputedEmbeddings struct {
Single map[string][]float32 `json:"single"`
Bigram map[string][]float32 `json:"bigram"`
Trigram map[string][]float32 `json:"trigram"`
Fourgram map[string][]float32 `json:"fourgram"`
}
PrecomputedEmbeddings stores actual embedding vectors
type PresetConfig ¶
type PresetConfig struct {
Preset SearchPreset
DatasetSize int // Estimated number of vectors
}
PresetConfig contains simplified configuration options
type SaveOptions ¶
type SaveOptions struct {
Format PersistenceFormat
Compress bool
IncludeTexts bool
Metadata map[string]interface{}
}
SaveOptions configures how the index is saved
func DefaultSaveOptions ¶
func DefaultSaveOptions() SaveOptions
DefaultSaveOptions returns recommended save options
type SearchConfig ¶
type SearchConfig struct {
// Automatic mode - let the engine choose optimal settings
AutoMode bool
// Preset configuration (when AutoMode is true)
Preset SearchPreset // Use predefined configuration preset
// Manual configuration (when AutoMode is false)
MaxExactSearchSize int // Use exact search below this size (default: 50000)
NumClusters int // Number of IVF clusters (default: auto)
SearchClusters int // Number of clusters to search (default: auto)
UseCompression bool // Use PQ compression for large datasets (default: auto)
UseGraphRouting bool // Use HNSW for centroid routing (default: auto)
CandidatesToRerank int // Number of candidates to rerank (default: auto)
// Async configuration
EnableAsync bool // Enable async indexing (default: false)
AsyncWorkers int // Number of async workers (default: 4)
AsyncQueueSize int // Size of async queue (default: 1000)
MaxConcurrency int // Maximum concurrent operations (default: runtime.NumCPU())
// GPU acceleration configuration
EnableGPU bool // Enable GPU acceleration for similarity search (default: false)
GPUDeviceID int // CUDA device ID to use (default: 0)
GPUBatchSize int // Batch size for GPU operations (default: 1000)
UseInt8 bool // Use int8 quantization for embeddings (75% memory savings)
}
SearchConfig configures the search engine
func AsyncSearchConfig ¶
func AsyncSearchConfig() SearchConfig
AsyncSearchConfig returns configuration optimized for async processing
func AutoOptimizedSearchConfig ¶
func AutoOptimizedSearchConfig() SearchConfig
AutoOptimizedSearchConfig returns the best configuration based on available hardware It automatically detects and uses GPU acceleration when available
func DefaultSearchConfig ¶
func DefaultSearchConfig() SearchConfig
DefaultSearchConfig returns optimized default configuration Automatically detects and enables GPU with CAGRA when available
func GPUSearchConfig ¶
func GPUSearchConfig() SearchConfig
GPUSearchConfig returns configuration optimized for GPU acceleration with CAGRA
type SearchEngine ¶
type SearchEngine struct {
// contains filtered or unexported fields
}
SearchEngine provides a high-level API for vector search
func FastSearchEngine ¶
func FastSearchEngine(model *EmbeddingModel) *SearchEngine
FastSearchEngine is an alias for NewAutoSearchEngine for backward compatibility
func NewAsyncSearchEngine ¶
func NewAsyncSearchEngine(model *EmbeddingModel) *SearchEngine
NewAsyncSearchEngine creates a search engine optimized for async operations
func NewAutoSearchEngine ¶
func NewAutoSearchEngine(model *EmbeddingModel) *SearchEngine
NewAutoSearchEngine creates a search engine with automatic hardware optimization This is the recommended way to create a search engine - it will automatically use GPU acceleration if available, or optimized CPU settings otherwise
func NewCAGRASearchEngine ¶
func NewCAGRASearchEngine(model *EmbeddingModel) *SearchEngine
NewCAGRASearchEngine creates a CAGRA-powered search engine for ultra-fast search
func NewGPUSearchEngine ¶
func NewGPUSearchEngine(model *EmbeddingModel) *SearchEngine
NewGPUSearchEngine creates a GPU-accelerated search engine with CAGRA
func NewSearchEngine ¶
func NewSearchEngine(model *EmbeddingModel) *SearchEngine
NewSearchEngine creates a new search engine It automatically uses GPU acceleration if available for 39x performance boost
func NewSearchEngineWithConfig ¶
func NewSearchEngineWithConfig(model *EmbeddingModel, config SearchConfig) *SearchEngine
NewSearchEngineWithConfig creates a search engine with custom configuration
func NewSearchEngineWithPreset ¶
func NewSearchEngineWithPreset(model *EmbeddingModel, preset SearchPreset) (*SearchEngine, error)
NewSearchEngineWithPreset creates a search engine with a preset configuration
func (*SearchEngine) AutoSave ¶
func (se *SearchEngine) AutoSave(dir string, interval time.Duration)
AutoSave starts automatic periodic saving
func (*SearchEngine) Checkpoint ¶
func (se *SearchEngine) Checkpoint(dir string) error
Checkpoint creates a checkpoint of the current index state
func (*SearchEngine) Close ¶
func (se *SearchEngine) Close() error
Close shuts down the search engine and stops async workers
func (*SearchEngine) FindSimilar ¶
func (se *SearchEngine) FindSimilar(documentID int, k int) ([]SearchResult, error)
FindSimilar finds documents similar to a given document ID
func (*SearchEngine) Flush ¶
func (se *SearchEngine) Flush() error
Flush waits for all pending async indexing operations to complete
func (*SearchEngine) GetAllDocuments ¶
func (se *SearchEngine) GetAllDocuments() map[int]string
GetAllDocuments returns all indexed documents
func (*SearchEngine) GetDocument ¶
func (se *SearchEngine) GetDocument(id int) (string, bool)
GetDocument retrieves a document by ID
func (*SearchEngine) Index ¶
func (se *SearchEngine) Index(text string) (int, error)
Index adds and indexes a single text with auto-generated ID
func (*SearchEngine) IndexBatch ¶
func (se *SearchEngine) IndexBatch(texts []string) ([]int, error)
IndexBatch efficiently indexes multiple texts
func (*SearchEngine) IndexBatchAsync ¶
func (se *SearchEngine) IndexBatchAsync(texts []string) <-chan IndexResponse
IndexBatchAsync asynchronously indexes multiple texts and returns a channel for the result
func (*SearchEngine) IndexBatchAsyncWithIDs ¶
func (se *SearchEngine) IndexBatchAsyncWithIDs(ids []int, texts []string) <-chan IndexResponse
IndexBatchAsyncWithIDs asynchronously indexes texts with specific IDs
func (*SearchEngine) IndexBatchWithIDs ¶
func (se *SearchEngine) IndexBatchWithIDs(ids []int, texts []string) error
IndexBatchWithIDs efficiently indexes multiple texts with specific IDs
func (*SearchEngine) IndexWithID ¶
func (se *SearchEngine) IndexWithID(id int, text string) error
IndexWithID adds and indexes a text with a specific ID
func (*SearchEngine) Load ¶
func (se *SearchEngine) Load(path string) error
Load restores a SearchEngine from disk
func (*SearchEngine) LoadFromDirectory ¶
func (se *SearchEngine) LoadFromDirectory(dir string) error
LoadFromDirectory loads the index from a directory
func (*SearchEngine) Optimize ¶
func (se *SearchEngine) Optimize() error
Optimize rebuilds the index with optimal parameters for current data
func (*SearchEngine) QuickSave ¶
func (se *SearchEngine) QuickSave(path string) error
QuickSave saves the index with default options
func (*SearchEngine) Save ¶
func (se *SearchEngine) Save(path string, options SaveOptions) error
Save persists the SearchEngine to disk
func (*SearchEngine) SaveToDirectory ¶
func (se *SearchEngine) SaveToDirectory(dir string, options SaveOptions) error
SaveToDirectory saves the index to a directory with metadata
func (*SearchEngine) Search ¶
func (se *SearchEngine) Search(query string, k int) ([]SearchResult, error)
Search performs semantic search and returns top K results
func (*SearchEngine) SearchWithOptions ¶
func (se *SearchEngine) SearchWithOptions(query string, opts SearchOptions) ([]SearchResult, error)
SearchWithOptions performs search with advanced options
func (*SearchEngine) Size ¶
func (se *SearchEngine) Size() int
Size returns the number of indexed documents
func (*SearchEngine) Stats ¶
func (se *SearchEngine) Stats() SearchEngineStats
Stats returns search engine statistics
type SearchEngineStats ¶
type SearchEngineStats struct {
NumDocuments int
IndexType string
MemoryUsageMB float64
Initialized bool
IndexDetails map[string]interface{}
}
SearchEngineStats contains engine statistics
type SearchOptions ¶
type SearchOptions struct {
TopK int // Number of results to return
MinSimilarity float32 // Minimum similarity threshold (0-1)
MaxDistance float32 // Maximum distance threshold
IncludeVectors bool // Include embedding vectors in results
}
SearchOptions provides advanced search options
type SearchPreset ¶
type SearchPreset int
SearchPreset represents predefined search configurations
const ( // FastPreset prioritizes speed for small datasets (<50K vectors) FastPreset SearchPreset = iota // BalancedPreset balances speed and accuracy for medium datasets (50K-500K vectors) BalancedPreset // AccuratePreset prioritizes accuracy for large datasets (>500K vectors) AccuratePreset // CAGRAPreset uses NVIDIA CAGRA for ultra-fast search (sub-millisecond latency) CAGRAPreset // CustomPreset allows manual configuration CustomPreset )
type SearchRequest ¶
type SearchRequest struct {
Query string `json:"query"`
Queries []string `json:"queries,omitempty"` // Batch search
K int `json:"k"`
Timeout int `json:"timeout_ms,omitempty"`
RequestID string `json:"request_id,omitempty"`
}
SearchRequest represents a search API request
type SearchResponse ¶
type SearchResponse struct {
Results []SearchResult `json:"results,omitempty"`
Batch [][]SearchResult `json:"batch,omitempty"`
Latency int64 `json:"latency_us"`
RequestID string `json:"request_id,omitempty"`
Error string `json:"error,omitempty"`
}
SearchResponse represents a search API response
type SearchResult ¶
SearchResult represents a search result with similarity score
type SearchServer ¶
type SearchServer struct {
// contains filtered or unexported fields
}
SearchServer provides a high-performance HTTP search server with shared memory
func NewSearchServer ¶
func NewSearchServer(model *EmbeddingModel, config ServerConfig) (*SearchServer, error)
NewSearchServer creates a new high-performance search server
type ServerConfig ¶
type ServerConfig struct {
Port int
MaxVectors int
MaxConcurrency int
EnableProfiling bool
EnableMetrics bool
ReadOnly bool
PreloadEmbeddings bool
WorkerThreads int
}
ServerConfig configures the search server
func DefaultServerConfig ¶
func DefaultServerConfig() ServerConfig
DefaultServerConfig returns optimized server configuration
type ServerDocument ¶
ServerDocument represents a document to index
type ServerIndexRequest ¶
type ServerIndexRequest struct {
Documents []ServerDocument `json:"documents"`
Async bool `json:"async,omitempty"`
RequestID string `json:"request_id,omitempty"`
}
ServerIndexRequest represents an indexing API request
type ServerIndexResponse ¶
type ServerIndexResponse struct {
Indexed int `json:"indexed"`
Latency int64 `json:"latency_us"`
RequestID string `json:"request_id,omitempty"`
Error string `json:"error,omitempty"`
}
ServerIndexResponse represents an indexing API response
type SharedIndexHeader ¶
type SharedIndexHeader struct {
// Atomic fields for lock-free reads
// Index configuration
// Memory layout
// Write coordination
// Statistics
}
SharedIndexHeader is the header stored in shared memory
type SharedMemoryConfig ¶
type SharedMemoryConfig struct {
}
SharedMemoryConfig configures shared memory index
type SharedMemoryIndex ¶
type SharedMemoryIndex struct {
// contains filtered or unexported fields
}
SharedMemoryIndex provides zero-copy, cross-process vector search
func NewSharedMemoryIndex ¶
func NewSharedMemoryIndex(config SharedMemoryConfig) (*SharedMemoryIndex, error)
NewSharedMemoryIndex creates a new shared memory index
func (*SharedMemoryIndex) BatchSearch ¶
func (idx *SharedMemoryIndex) BatchSearch(queries []*simd.Vec512, k int) [][]SearchResult
BatchSearch performs multiple searches efficiently
func (*SharedMemoryIndex) Close ¶
func (idx *SharedMemoryIndex) Close() error
Close unmaps memory and closes files
func (*SharedMemoryIndex) GetVector ¶
func (idx *SharedMemoryIndex) GetVector(index int) (*simd.Vec512, error)
GetVector returns a zero-copy reference to a vector
func (*SharedMemoryIndex) ReleaseWriter ¶
func (idx *SharedMemoryIndex) ReleaseWriter()
ReleaseWriter releases writer lock
func (*SharedMemoryIndex) SearchTopK ¶
func (idx *SharedMemoryIndex) SearchTopK(query *simd.Vec512, k int) []SearchResult
SearchTopK performs zero-copy k-NN search directly on shared memory
func (*SharedMemoryIndex) Stats ¶
func (idx *SharedMemoryIndex) Stats() SharedIndexStats
Stats returns index statistics
func (*SharedMemoryIndex) Sync ¶
func (idx *SharedMemoryIndex) Sync() error
Sync forces synchronization to disk
func (*SharedMemoryIndex) TryAcquireWriter ¶
func (idx *SharedMemoryIndex) TryAcquireWriter() bool
TryAcquireWriter attempts to become the writer process
func (*SharedMemoryIndex) WaitForWrites ¶
func (idx *SharedMemoryIndex) WaitForWrites(targetSeq uint64)
WaitForWrites waits for pending writes to complete
type SimilarityResult ¶
SimilarityResult represents a similarity comparison
type SimpleInt8Model512 ¶
type SimpleInt8Model512 struct {
// contains filtered or unexported fields
}
SimpleInt8Model512 is a simple version that works without external C deps
func LoadSimpleInt8Model512 ¶
func LoadSimpleInt8Model512() (*SimpleInt8Model512, error)
LoadSimpleInt8Model512 loads the int8 model with built-in tokenizer (singleton)
func (*SimpleInt8Model512) Close ¶
func (m *SimpleInt8Model512) Close() error
Close is provided for API compatibility with heavier model implementations.
func (*SimpleInt8Model512) Embed ¶
func (m *SimpleInt8Model512) Embed(text string) ([]float32, error)
Embed embeds text using simple int8 model
func (*SimpleInt8Model512) EmbedBatchInt8 ¶
func (m *SimpleInt8Model512) EmbedBatchInt8(texts []string) ([]*Int8Result512, error)
EmbedBatchInt8 efficiently processes multiple texts in a single call
func (*SimpleInt8Model512) EmbedBatchInt8Optimized ¶
func (m *SimpleInt8Model512) EmbedBatchInt8Optimized(texts []string, progressCallback func(processed, total int)) ([]*Int8Result512, error)
EmbedBatchInt8Optimized provides the fastest batch processing with memory optimization
func (*SimpleInt8Model512) EmbedDim ¶
func (m *SimpleInt8Model512) EmbedDim() int
EmbedDim returns the embedding dimensionality for API compatibility.
func (*SimpleInt8Model512) EmbedFast ¶
func (m *SimpleInt8Model512) EmbedFast(text string) ([]float32, func())
EmbedFast is the zero-allocation embedding path using buffer pools
func (*SimpleInt8Model512) EmbedInt8 ¶
func (m *SimpleInt8Model512) EmbedInt8(text string) (*Int8Result512, error)
EmbedInt8 returns int8 quantized embedding
func (*SimpleInt8Model512) EmbedTokens ¶
func (m *SimpleInt8Model512) EmbedTokens(tokens []int16) ([]float32, error)
EmbedTokens embeds int16 token IDs directly
func (*SimpleInt8Model512) EmbedTokensInt8 ¶
func (m *SimpleInt8Model512) EmbedTokensInt8(tokens []int16) (*Int8Result512, error)
EmbedTokensInt8 converts pre-tokenized input directly into an int8 embedding result.
func (*SimpleInt8Model512) EmbedTokensInto ¶
func (m *SimpleInt8Model512) EmbedTokensInto(tokens []int16, result []float32) int
EmbedTokensInto embeds tokens into a pre-allocated buffer (zero-alloc hot path)
func (*SimpleInt8Model512) EmbedTokensIntoSIMD ¶
func (m *SimpleInt8Model512) EmbedTokensIntoSIMD(tokens []int16, result []float32) int
EmbedTokensIntoSIMD uses AVX2 SIMD for faster embedding accumulation
func (*SimpleInt8Model512) EmbeddingTable ¶
func (m *SimpleInt8Model512) EmbeddingTable() [][]int8
EmbeddingTable returns the vocab embedding matrix (read-only).
func (*SimpleInt8Model512) ScaleTable ¶
func (m *SimpleInt8Model512) ScaleTable() []float32
ScaleTable returns per-token quantization scales (read-only).
func (*SimpleInt8Model512) Similarity ¶
func (m *SimpleInt8Model512) Similarity(text1, text2 string) (float32, error)
Similarity computes cosine similarity between two texts
func (*SimpleInt8Model512) SimpleTokenize ¶
func (m *SimpleInt8Model512) SimpleTokenize(text string) []int16
SimpleTokenize performs basic tokenization (space-separated + subword)
func (*SimpleInt8Model512) VocabSize ¶
func (m *SimpleInt8Model512) VocabSize() int
VocabSize returns the size of the token vocabulary.
type SimplifiedSearchConfig ¶
type SimplifiedSearchConfig struct {
Preset SearchPreset
DatasetSize int
// Optional custom parameters (only used with CustomPreset)
CustomConfig *SearchConfig
}
SimplifiedSearchConfig provides a simpler configuration interface
type TensorInfo ¶
type TensorInfo struct {
Dtype string `json:"dtype"`
Shape []int `json:"shape"`
DataOffsets [2]int64 `json:"data_offsets"`
}
TensorInfo contains safetensors tensor metadata
type TokenFrequencyData ¶
type TokenFrequencyData struct {
TokenizerName string `json:"tokenizer_name"`
VocabSize int `json:"vocab_size"`
Stopwords []int `json:"stopwords"`
SingleTokens TokenFrequencySection `json:"single_tokens"`
Bigrams TokenPatternSection `json:"bigrams"`
Trigrams TokenPatternSection `json:"trigrams"`
Fourgrams TokenPatternSection `json:"fourgrams"`
Stats map[string]int `json:"stats"`
}
TokenFrequencyData represents the frequency analysis results
type TokenFrequencySection ¶
type TokenPatternCache ¶
type TokenPatternCache struct {
// contains filtered or unexported fields
}
TokenPatternCache stores precomputed embeddings for common token patterns
func NewTokenPatternCache ¶
func NewTokenPatternCache(freqFile, embeddingFile string) (*TokenPatternCache, error)
NewTokenPatternCache creates a new cache with precomputed embeddings
func (*TokenPatternCache) BatchGetEmbeddings ¶
func (c *TokenPatternCache) BatchGetEmbeddings(tokenBatches [][]int) ([]*CachedEmbedding, []bool)
BatchGetEmbeddings retrieves embeddings for multiple patterns in parallel
func (*TokenPatternCache) ComputeEmbeddingWithCache ¶
func (c *TokenPatternCache) ComputeEmbeddingWithCache(tokens []int, computeFn func([]int) ([]float32, error)) ([]float32, error)
ComputeEmbeddingWithCache computes embedding using cache where possible
func (*TokenPatternCache) FilterStopwords ¶
func (c *TokenPatternCache) FilterStopwords(tokens []int, textLength int) []int
FilterStopwords removes stopwords from token sequence if text is long
func (*TokenPatternCache) GetCachedEmbedding ¶
func (c *TokenPatternCache) GetCachedEmbedding(tokens []int) (*CachedEmbedding, bool)
GetCachedEmbedding tries to retrieve cached embedding for token pattern
func (*TokenPatternCache) GetStats ¶
func (c *TokenPatternCache) GetStats() map[string]interface{}
GetStats returns cache statistics
func (*TokenPatternCache) PrecomputeCommonPatterns ¶
func (c *TokenPatternCache) PrecomputeCommonPatterns(model *EmbeddingModel, patterns [][]int)
PrecomputeCommonPatterns adds embeddings for common patterns
type TokenPatternSection ¶
type TokenizerOptimizations ¶
type TokenizerOptimizations struct {
// contains filtered or unexported fields
}
TokenizerOptimizations provides fast tokenization helpers
func NewTokenizerOptimizations ¶
func NewTokenizerOptimizations(maxCacheSize int) *TokenizerOptimizations
NewTokenizerOptimizations creates optimized tokenizer wrapper
func (*TokenizerOptimizations) ClearCache ¶
func (t *TokenizerOptimizations) ClearCache()
ClearCache clears the tokenization cache
func (*TokenizerOptimizations) TokenizeCached ¶
func (t *TokenizerOptimizations) TokenizeCached(text string, tokenizeFn func(string) ([]uint32, error)) ([]int, error)
TokenizeCached performs cached tokenization
type UnifiedModelConfig ¶
type UnifiedModelConfig struct {
UseInt8 bool // Use optimized int8 model (recommended)
ModelDir string // Optional custom model directory
ForceFloat32 bool // Force float32 for compatibility (slower)
}
UnifiedModelConfig configures model loading behavior
func DefaultFastConfig ¶
func DefaultFastConfig() *UnifiedModelConfig
DefaultFastConfig returns optimized configuration for maximum performance
type VectorBuffer ¶
type VectorBuffer struct {
// contains filtered or unexported fields
}
VectorBuffer provides a reusable buffer for vector operations
func NewVectorBuffer ¶
func NewVectorBuffer(capacity int) *VectorBuffer
NewVectorBuffer creates a vector buffer with specified capacity
func (*VectorBuffer) Add ¶
func (vb *VectorBuffer) Add(vec simd.Vec512, scale float32)
Add adds a vector to the buffer
func (*VectorBuffer) GetVectors ¶
func (vb *VectorBuffer) GetVectors() ([]simd.Vec512, []float32)
GetVectors returns the current vectors and scales
func (*VectorBuffer) IsFull ¶
func (vb *VectorBuffer) IsFull() bool
IsFull returns true if buffer is at capacity
type VectorIndex ¶
type VectorIndex struct {
// contains filtered or unexported fields
}
VectorIndex provides high-performance vector search capabilities
func NewVectorIndex ¶
func NewVectorIndex(model *EmbeddingModel, config VectorIndexConfig) *VectorIndex
NewVectorIndex creates a new vector index
func (*VectorIndex) AddDocument ¶
func (idx *VectorIndex) AddDocument(doc Document) error
AddDocument adds a document to the index
func (*VectorIndex) AddDocuments ¶
func (idx *VectorIndex) AddDocuments(docs []Document) error
AddDocuments adds multiple documents efficiently
func (*VectorIndex) AddDocumentsBulkGPU ¶
func (idx *VectorIndex) AddDocumentsBulkGPU(docs []Document) error
AddDocumentsBulkGPU forces GPU bulk indexing regardless of size
func (*VectorIndex) AddDocumentsWithMonitoring ¶
func (idx *VectorIndex) AddDocumentsWithMonitoring(docs []Document) (<-chan interface{}, error)
AddDocumentsWithMonitoring adds documents with real-time GPU monitoring
func (*VectorIndex) Search ¶
func (idx *VectorIndex) Search(query string, k int) ([]SearchResult, error)
Search performs similarity search
func (*VectorIndex) Size ¶
func (idx *VectorIndex) Size() int
Size returns the number of indexed documents
func (*VectorIndex) Stats ¶
func (idx *VectorIndex) Stats() VectorIndexStats
Stats returns index statistics
func (*VectorIndex) Train ¶
func (idx *VectorIndex) Train(texts []string) error
Train trains the index on sample data for better performance
type VectorIndexConfig ¶
type VectorIndexConfig struct {
// Index configuration
MaxFlatSize int // Use flat index below this size (default: 50000)
NList int // Number of IVF clusters (default: 4096)
NProbe int // Number of clusters to search (default: 8)
UsePQ bool // Use product quantization (default: true for >100k)
UseHNSW bool // Use HNSW for routing (default: true)
// Search configuration
RerankSize int // Number of candidates to rerank (default: 128)
UseParallel bool // Use parallel search (default: true)
// Bulk indexing configuration
EnableBulkGPU bool // Enable GPU bulk indexing for large datasets
BulkBatchSize int // Batch size for GPU bulk indexing (default: 5000)
}
VectorIndexConfig configures the vector index
func DefaultVectorIndexConfig ¶
func DefaultVectorIndexConfig() VectorIndexConfig
DefaultVectorIndexConfig returns default configuration
Source Files
¶
- cagra_config_stub.go
- cosine_similarity.go
- cpu_bulk_indexer.go
- embedding_cache.go
- fused_cagra_stub.go
- gobed.go
- gobed_int8.go
- gobed_int8_512.go
- gobed_int8_512_simple.go
- gobed_optimized.go
- gpu_batch_processor_stub.go
- gpu_embedding.go
- gpu_indexer_stub.go
- gpu_memory_manager_stub.go
- gpu_search_server.go
- gpu_search_server_adapter.go
- gpu_stubs.go
- gpubridge_stub.go
- logging.go
- model_loader.go
- optimizations.go
- parallel_indexing.go
- performance_optimizations.go
- persistence.go
- search_api.go
- search_auto_gpu.go
- search_config_mapper.go
- search_presets.go
- search_server.go
- shared_memory_index.go
- vector_search.go
Directories
¶
| Path | Synopsis |
|---|---|
|
ann
|
|
|
bed
module
|
|
|
cmd
|
|
|
bed
command
|
|
|
bed-search
command
|
|
|
bedfast
command
|
|
|
gpu_perf_test
command
|
|
|
gpu_scale_benchmark
command
|
|
|
real_data_benchmark
command
|
|
|
test_ndcg
command
|
|
|
internal
|
|
|
benchmarks
command
|
|
|
pkg
|
|