Documentation
¶
Index ¶
- func IsProviderSupported(provider EmbedderProvider) bool
- func NewDocument(content string, metadata map[string]interface{}) *interfaces.Document
- func NewDocumentWithID(id, content string, metadata map[string]interface{}) *interfaces.Document
- func RegisterEmbedderProvider(provider EmbedderProvider, factory EmbedderFactory)
- func UnregisterEmbedderProvider(provider EmbedderProvider)
- type BaseEmbedder
- type BaseReranker
- type BaseRetriever
- func (r *BaseRetriever) Batch(ctx context.Context, queries []string) ([][]*interfaces.Document, error)
- func (r *BaseRetriever) FilterByScore(docs []*interfaces.Document) []*interfaces.Document
- func (r *BaseRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
- func (r *BaseRetriever) Invoke(ctx context.Context, query string) ([]*interfaces.Document, error)
- func (r *BaseRetriever) LimitTopK(docs []*interfaces.Document) []*interfaces.Document
- func (r *BaseRetriever) Pipe(next core.Runnable[[]*interfaces.Document, any]) core.Runnable[string, any]
- func (r *BaseRetriever) Stream(ctx context.Context, query string) (<-chan core.StreamChunk[[]*interfaces.Document], error)
- func (r *BaseRetriever) WithCallbacks(callbacks ...core.Callback) core.Runnable[string, []*interfaces.Document]
- func (r *BaseRetriever) WithConfig(config core.RunnableConfig) core.Runnable[string, []*interfaces.Document]
- type CohereEmbedder
- type CohereEmbedderConfig
- type CohereReranker
- type CompareRankers
- type CrossEncoderReranker
- type DistanceMetric
- type DocumentCollection
- func (dc DocumentCollection) Deduplicate() DocumentCollection
- func (dc DocumentCollection) Filter(predicate func(*interfaces.Document) bool) DocumentCollection
- func (dc DocumentCollection) Len() int
- func (dc DocumentCollection) Less(i, j int) bool
- func (dc DocumentCollection) Map(mapper func(*interfaces.Document) *interfaces.Document) DocumentCollection
- func (dc DocumentCollection) SortByScore()
- func (dc DocumentCollection) Swap(i, j int)
- func (dc DocumentCollection) Top(n int) DocumentCollection
- type DocumentWithVector
- type Embedder
- type EmbedderFactory
- type EmbedderOption
- func WithAPIKey(apiKey string) EmbedderOption
- func WithBaseURL(baseURL string) EmbedderOption
- func WithCustomEmbedder(embedder Embedder) EmbedderOption
- func WithDimensions(dimensions int) EmbedderOption
- func WithHFEndpoint(endpoint string) EmbedderOption
- func WithInputType(inputType string) EmbedderOption
- func WithLocation(location string) EmbedderOption
- func WithModel(model string) EmbedderOption
- func WithProjectID(projectID string) EmbedderOption
- func WithProvider(provider EmbedderProvider) EmbedderOption
- type EmbedderOptions
- type EmbedderProvider
- type EnsembleRetriever
- type FusionStrategy
- type HuggingFaceEmbedder
- type HuggingFaceEmbedderConfig
- type HybridRetriever
- type InvertedIndex
- type KeywordAlgorithm
- type KeywordRetriever
- type LLMReranker
- type MMRReranker
- type MemoryVectorStore
- func (m *MemoryVectorStore) Add(ctx context.Context, docs []*interfaces.Document, vectors [][]float32) error
- func (m *MemoryVectorStore) AddDocuments(ctx context.Context, docs []*interfaces.Document) error
- func (m *MemoryVectorStore) Clear()
- func (m *MemoryVectorStore) Count() int
- func (m *MemoryVectorStore) Delete(ctx context.Context, ids []string) error
- func (m *MemoryVectorStore) Get(ctx context.Context, id string) (*interfaces.Document, error)
- func (m *MemoryVectorStore) GetEmbedding(ctx context.Context, text string) ([]float32, error)
- func (m *MemoryVectorStore) GetVector(ctx context.Context, id string) ([]float32, error)
- func (m *MemoryVectorStore) Search(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
- func (m *MemoryVectorStore) SearchByVector(ctx context.Context, queryVector []float32, topK int) ([]*interfaces.Document, error)
- func (m *MemoryVectorStore) SimilaritySearch(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
- func (m *MemoryVectorStore) SimilaritySearchWithScore(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
- func (m *MemoryVectorStore) Update(ctx context.Context, docs []*interfaces.Document) error
- type MemoryVectorStoreConfig
- type MockVectorStore
- func (m *MockVectorStore) AddDocuments(ctx context.Context, docs []*interfaces.Document) error
- func (m *MockVectorStore) Clear()
- func (m *MockVectorStore) Delete(ctx context.Context, ids []string) error
- func (m *MockVectorStore) GetAllDocuments() []*interfaces.Document
- func (m *MockVectorStore) LoadDocuments(docs []*interfaces.Document)
- func (m *MockVectorStore) SimilaritySearch(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
- func (m *MockVectorStore) SimilaritySearchWithScore(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
- type MultiQueryRetriever
- type OpenAIEmbedder
- type OpenAIEmbedderConfig
- type QdrantConfig
- type QdrantVectorStore
- func (q *QdrantVectorStore) Add(ctx context.Context, docs []*interfaces.Document, vectors [][]float32) error
- func (q *QdrantVectorStore) AddDocuments(ctx context.Context, docs []*interfaces.Document) error
- func (q *QdrantVectorStore) Close() error
- func (q *QdrantVectorStore) Delete(ctx context.Context, ids []string) error
- func (q *QdrantVectorStore) GetEmbedding(ctx context.Context, text string) ([]float32, error)
- func (q *QdrantVectorStore) Search(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
- func (q *QdrantVectorStore) SearchByVector(ctx context.Context, queryVector []float32, topK int) ([]*interfaces.Document, error)
- func (q *QdrantVectorStore) SimilaritySearch(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
- func (q *QdrantVectorStore) SimilaritySearchWithScore(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
- func (q *QdrantVectorStore) Update(ctx context.Context, docs []*interfaces.Document) error
- type QdrantVectorStoreOption
- type RAGChain
- type RAGMultiQueryRetriever
- type RAGRetriever
- func (r *RAGRetriever) AddDocuments(ctx context.Context, docs []*interfaces.Document) error
- func (r *RAGRetriever) Clear() error
- func (r *RAGRetriever) Retrieve(ctx context.Context, query string) ([]*interfaces.Document, error)
- func (r *RAGRetriever) RetrieveAndFormat(ctx context.Context, query string, template string) (string, error)
- func (r *RAGRetriever) RetrieveWithContext(ctx context.Context, query string) (string, error)
- func (r *RAGRetriever) SetScoreThreshold(threshold float32)
- func (r *RAGRetriever) SetTopK(topK int)
- type RAGRetrieverConfig
- type RankFusion
- type Reranker
- type RerankingRetriever
- type Retriever
- type RetrieverConfig
- type SearchType
- type SimpleEmbedder
- type VectorStoreRetriever
- func (v *VectorStoreRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
- func (v *VectorStoreRetriever) WithSearchKwargs(kwargs map[string]interface{}) *VectorStoreRetriever
- func (v *VectorStoreRetriever) WithSearchType(searchType SearchType) *VectorStoreRetriever
- type VertexAIEmbedder
- type VertexAIEmbedderConfig
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func IsProviderSupported ¶ added in v0.6.0
func IsProviderSupported(provider EmbedderProvider) bool
IsProviderSupported 检查提供商是否支持(内置或已注册)
func NewDocument ¶
func NewDocument(content string, metadata map[string]interface{}) *interfaces.Document
NewDocument 创建新文档
func NewDocumentWithID ¶
func NewDocumentWithID(id, content string, metadata map[string]interface{}) *interfaces.Document
NewDocumentWithID 创建带 ID 的文档
func RegisterEmbedderProvider ¶ added in v0.6.0
func RegisterEmbedderProvider(provider EmbedderProvider, factory EmbedderFactory)
RegisterEmbedderProvider 注册自定义嵌入器提供商
允许用户注册自定义的嵌入器工厂函数,以支持内置提供商之外的服务
使用示例:
// 注册自定义提供商
retrieval.RegisterEmbedderProvider("my-provider", func(ctx context.Context, opts *retrieval.EmbedderOptions) (retrieval.Embedder, error) {
return NewMyCustomEmbedder(opts.APIKey, opts.Model)
})
// 使用自定义提供商
embedder, err := retrieval.NewEmbedder(ctx,
retrieval.WithProvider("my-provider"),
retrieval.WithAPIKey("xxx"),
)
func UnregisterEmbedderProvider ¶ added in v0.6.0
func UnregisterEmbedderProvider(provider EmbedderProvider)
UnregisterEmbedderProvider 注销自定义嵌入器提供商
Types ¶
type BaseEmbedder ¶
type BaseEmbedder struct {
// contains filtered or unexported fields
}
BaseEmbedder 基础嵌入器实现
type BaseReranker ¶
type BaseReranker struct {
Name string
}
BaseReranker 基础重排序器
func (*BaseReranker) Rerank ¶
func (b *BaseReranker) Rerank(ctx context.Context, query string, docs []*interfaces.Document) ([]*interfaces.Document, error)
Rerank 默认实现(不改变顺序)
type BaseRetriever ¶
type BaseRetriever struct {
*core.BaseRunnable[string, []*interfaces.Document]
// TopK 返回的最大文档数
TopK int
// MinScore 最小分数阈值(过滤低分文档)
MinScore float64
// Name 检索器名称(用于日志和追踪)
Name string
}
BaseRetriever 提供 Retriever 的基础实现
实现了 Runnable 接口的通用功能 子类只需实现 GetRelevantDocuments 方法
func (*BaseRetriever) Batch ¶
func (r *BaseRetriever) Batch(ctx context.Context, queries []string) ([][]*interfaces.Document, error)
Batch 批量执行
func (*BaseRetriever) FilterByScore ¶
func (r *BaseRetriever) FilterByScore(docs []*interfaces.Document) []*interfaces.Document
FilterByScore 按分数过滤文档
func (*BaseRetriever) GetRelevantDocuments ¶
func (r *BaseRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
GetRelevantDocuments 基础实现(返回空列表)
子类应该重写此方法
func (*BaseRetriever) Invoke ¶
func (r *BaseRetriever) Invoke(ctx context.Context, query string) ([]*interfaces.Document, error)
Invoke 执行检索(实现 Runnable 接口)
func (*BaseRetriever) LimitTopK ¶
func (r *BaseRetriever) LimitTopK(docs []*interfaces.Document) []*interfaces.Document
LimitTopK 限制返回的文档数量
func (*BaseRetriever) Pipe ¶
func (r *BaseRetriever) Pipe(next core.Runnable[[]*interfaces.Document, any]) core.Runnable[string, any]
Pipe 连接到另一个 Runnable
func (*BaseRetriever) Stream ¶
func (r *BaseRetriever) Stream(ctx context.Context, query string) (<-chan core.StreamChunk[[]*interfaces.Document], error)
Stream 流式执行(默认实现)
func (*BaseRetriever) WithCallbacks ¶
func (r *BaseRetriever) WithCallbacks(callbacks ...core.Callback) core.Runnable[string, []*interfaces.Document]
WithCallbacks 添加回调
func (*BaseRetriever) WithConfig ¶
func (r *BaseRetriever) WithConfig(config core.RunnableConfig) core.Runnable[string, []*interfaces.Document]
WithConfig 配置 Retriever
type CohereEmbedder ¶ added in v0.6.0
type CohereEmbedder struct {
*BaseEmbedder
// contains filtered or unexported fields
}
CohereEmbedder 使用 Cohere 的嵌入模型
支持 embed-english-v3.0, embed-multilingual-v3.0 等模型
func NewCohereEmbedder ¶ added in v0.6.0
func NewCohereEmbedder(config CohereEmbedderConfig) (*CohereEmbedder, error)
NewCohereEmbedder 创建 Cohere 嵌入器
func (*CohereEmbedder) EmbedQuery ¶ added in v0.6.0
EmbedQuery 嵌入单个查询文本
type CohereEmbedderConfig ¶ added in v0.6.0
type CohereEmbedderConfig struct {
// APIKey Cohere API Key
APIKey string
// Model 模型名称,如 embed-english-v3.0
Model string
// InputType 输入类型:search_document, search_query, classification, clustering
InputType string
// BaseURL API 基础 URL(可选)
BaseURL string
// Dimensions 向量维度
Dimensions int
}
CohereEmbedderConfig Cohere 嵌入器配置
type CohereReranker ¶
type CohereReranker struct {
*BaseReranker
// APIKey Cohere API 密钥
APIKey string
// Model 模型名称
Model string
// TopN 返回前 N 个文档
TopN int
// contains filtered or unexported fields
}
CohereReranker Cohere Rerank API 重排序器
func NewCohereReranker ¶
func NewCohereReranker(apiKey, model string, topN int) (*CohereReranker, error)
NewCohereReranker 创建 Cohere 重排序器
参数:
- apiKey: Cohere API 密钥
- model: 模型名称(可选,默认为 "rerank-english-v2.0")
- topN: 返回前 N 个文档
返回:
- *CohereReranker: Cohere 重排序器实例
- error: 错误信息
func (*CohereReranker) Rerank ¶
func (c *CohereReranker) Rerank(ctx context.Context, query string, docs []*interfaces.Document) ([]*interfaces.Document, error)
Rerank 使用 Cohere API 重新排序
参数:
- ctx: 上下文
- query: 查询字符串
- docs: 待重排序的文档列表
返回:
- []*interfaces.Document: 重排序后的文档列表
- error: 错误信息
type CompareRankers ¶
type CompareRankers struct {
Rerankers []Reranker
}
CompareRankers 比较多个重排序器的性能
func (*CompareRankers) Compare ¶
func (c *CompareRankers) Compare(ctx context.Context, query string, docs []*interfaces.Document) (map[string][]*interfaces.Document, error)
Compare 对比重排序结果
type CrossEncoderReranker ¶
type CrossEncoderReranker struct {
*BaseReranker
// Model 模型名称
Model string
// TopN 返回前 N 个文档
TopN int
}
CrossEncoderReranker 交叉编码器重排序器
使用交叉编码器模型计算查询和文档的相关性分数
func NewCrossEncoderReranker ¶
func NewCrossEncoderReranker(model string, topN int) *CrossEncoderReranker
NewCrossEncoderReranker 创建交叉编码器重排序器
func (*CrossEncoderReranker) Rerank ¶
func (c *CrossEncoderReranker) Rerank(ctx context.Context, query string, docs []*interfaces.Document) ([]*interfaces.Document, error)
Rerank 重新排序文档
type DistanceMetric ¶
type DistanceMetric string
DistanceMetric 距离度量类型
const ( // DistanceMetricCosine 余弦相似度 DistanceMetricCosine DistanceMetric = "cosine" // DistanceMetricEuclidean 欧氏距离 DistanceMetricEuclidean DistanceMetric = "euclidean" // DistanceMetricDot 点积 DistanceMetricDot DistanceMetric = "dot" )
type DocumentCollection ¶
type DocumentCollection []*interfaces.Document
DocumentCollection 文档集合
func (DocumentCollection) Deduplicate ¶
func (dc DocumentCollection) Deduplicate() DocumentCollection
Deduplicate 去重(基于 ID)
func (DocumentCollection) Filter ¶
func (dc DocumentCollection) Filter(predicate func(*interfaces.Document) bool) DocumentCollection
Filter 过滤文档
func (DocumentCollection) Map ¶
func (dc DocumentCollection) Map(mapper func(*interfaces.Document) *interfaces.Document) DocumentCollection
Map 映射文档
func (DocumentCollection) SortByScore ¶
func (dc DocumentCollection) SortByScore()
SortByScore 按分数排序
func (DocumentCollection) Top ¶
func (dc DocumentCollection) Top(n int) DocumentCollection
Top 获取前 N 个文档
type DocumentWithVector ¶
type DocumentWithVector struct {
Document *interfaces.Document
Vector []float32
}
DocumentWithVector 包含向量的文档
type Embedder ¶
type Embedder interface {
// Embed 批量嵌入文本
Embed(ctx context.Context, texts []string) ([][]float32, error)
// EmbedQuery 嵌入单个查询文本
EmbedQuery(ctx context.Context, query string) ([]float32, error)
// Dimensions 返回向量维度
Dimensions() int
}
Embedder 嵌入模型接口
将文本转换为向量表示,用于语义搜索和相似度计算
func MustNewEmbedder ¶ added in v0.6.0
func MustNewEmbedder(ctx context.Context, opts ...EmbedderOption) Embedder
MustNewEmbedder 创建嵌入器,失败时 panic
用于初始化时确定不会失败的场景,如测试代码
func NewEmbedder ¶ added in v0.6.0
func NewEmbedder(ctx context.Context, opts ...EmbedderOption) (Embedder, error)
NewEmbedder 创建嵌入器的工厂函数
使用示例:
// 创建 OpenAI 嵌入器
embedder, err := retrieval.NewEmbedder(ctx,
retrieval.WithProvider(retrieval.EmbedderProviderOpenAI),
retrieval.WithAPIKey("sk-xxx"),
retrieval.WithModel("text-embedding-3-small"),
)
// 创建 Vertex AI 嵌入器
embedder, err := retrieval.NewEmbedder(ctx,
retrieval.WithProvider(retrieval.EmbedderProviderVertexAI),
retrieval.WithProjectID("my-project"),
retrieval.WithLocation("us-central1"),
)
// 创建简单嵌入器(用于测试)
embedder, err := retrieval.NewEmbedder(ctx,
retrieval.WithProvider(retrieval.EmbedderProviderSimple),
retrieval.WithDimensions(768),
)
// 使用自定义嵌入器
embedder, err := retrieval.NewEmbedder(ctx,
retrieval.WithCustomEmbedder(myEmbedder),
)
// 使用注册的自定义提供商
retrieval.RegisterEmbedderProvider("my-provider", myFactory)
embedder, err := retrieval.NewEmbedder(ctx,
retrieval.WithProvider("my-provider"),
)
type EmbedderFactory ¶ added in v0.6.0
type EmbedderFactory func(ctx context.Context, options *EmbedderOptions) (Embedder, error)
EmbedderFactory 自定义嵌入器工厂函数类型
用于注册自定义的嵌入器提供商
type EmbedderOption ¶ added in v0.6.0
type EmbedderOption func(*EmbedderOptions)
EmbedderOption 嵌入器配置函数类型
func WithAPIKey ¶ added in v0.6.0
func WithAPIKey(apiKey string) EmbedderOption
WithAPIKey 设置 API Key
func WithBaseURL ¶ added in v0.6.0
func WithBaseURL(baseURL string) EmbedderOption
WithBaseURL 设置 API 基础 URL
func WithCustomEmbedder ¶ added in v0.6.0
func WithCustomEmbedder(embedder Embedder) EmbedderOption
WithCustomEmbedder 直接注入自定义嵌入器
当使用此选项时,Provider 会自动设置为 EmbedderProviderCustom
使用示例:
myEmbedder := NewMyCustomEmbedder()
embedder, err := retrieval.NewEmbedder(ctx,
retrieval.WithCustomEmbedder(myEmbedder),
)
func WithDimensions ¶ added in v0.6.0
func WithDimensions(dimensions int) EmbedderOption
WithDimensions 设置向量维度
func WithHFEndpoint ¶ added in v0.6.0
func WithHFEndpoint(endpoint string) EmbedderOption
WithHFEndpoint 设置 Hugging Face 推理端点
func WithInputType ¶ added in v0.6.0
func WithInputType(inputType string) EmbedderOption
WithInputType 设置输入类型(Cohere)
func WithLocation ¶ added in v0.6.0
func WithLocation(location string) EmbedderOption
WithLocation 设置区域(Vertex AI)
func WithProjectID ¶ added in v0.6.0
func WithProjectID(projectID string) EmbedderOption
WithProjectID 设置 Google Cloud 项目 ID(Vertex AI)
func WithProvider ¶ added in v0.6.0
func WithProvider(provider EmbedderProvider) EmbedderOption
WithProvider 设置服务提供商
type EmbedderOptions ¶ added in v0.6.0
type EmbedderOptions struct {
// 通用配置
Provider EmbedderProvider
APIKey string
BaseURL string
Model string
Dimensions int
// Vertex AI 特定配置
ProjectID string
Location string
// Cohere 特定配置
InputType string // search_document, search_query, classification, clustering
// Hugging Face 特定配置
HFEndpoint string // 自定义推理端点
// 自定义嵌入器(直接注入)
CustomEmbedder Embedder
}
EmbedderOptions 嵌入器配置选项
func DefaultEmbedderOptions ¶ added in v0.6.0
func DefaultEmbedderOptions() *EmbedderOptions
DefaultEmbedderOptions 返回默认配置
type EmbedderProvider ¶ added in v0.6.0
type EmbedderProvider string
EmbedderProvider 嵌入服务提供商类型
const ( // EmbedderProviderOpenAI OpenAI 嵌入服务 EmbedderProviderOpenAI EmbedderProvider = "openai" // EmbedderProviderVertexAI Google Vertex AI 嵌入服务 EmbedderProviderVertexAI EmbedderProvider = "vertexai" // EmbedderProviderCohere Cohere 嵌入服务 EmbedderProviderCohere EmbedderProvider = "cohere" // EmbedderProviderHuggingFace Hugging Face 嵌入服务 EmbedderProviderHuggingFace EmbedderProvider = "huggingface" // EmbedderProviderSimple 简单嵌入器(用于测试) EmbedderProviderSimple EmbedderProvider = "simple" // EmbedderProviderCustom 自定义嵌入器 EmbedderProviderCustom EmbedderProvider = "custom" )
支持的嵌入服务提供商
func GetRegisteredProviders ¶ added in v0.6.0
func GetRegisteredProviders() []EmbedderProvider
GetRegisteredProviders 返回所有已注册的自定义提供商列表
func GetSupportedProviders ¶ added in v0.6.0
func GetSupportedProviders() []EmbedderProvider
GetSupportedProviders 返回所有内置支持的提供商列表
不包含通过 RegisterEmbedderProvider 注册的自定义提供商
type EnsembleRetriever ¶
type EnsembleRetriever struct {
*BaseRetriever
// Retrievers 检索器列表
Retrievers []Retriever
// Weights 每个检索器的权重
Weights []float64
}
EnsembleRetriever 集成检索器
组合多个检索器,使用加权融合策略
func NewEnsembleRetriever ¶
func NewEnsembleRetriever( retrievers []Retriever, weights []float64, config RetrieverConfig, ) *EnsembleRetriever
NewEnsembleRetriever 创建集成检索器
func (*EnsembleRetriever) AddRetriever ¶
func (e *EnsembleRetriever) AddRetriever(retriever Retriever, weight float64) *EnsembleRetriever
AddRetriever 添加检索器
func (*EnsembleRetriever) GetRelevantDocuments ¶
func (e *EnsembleRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
GetRelevantDocuments 检索相关文档
func (*EnsembleRetriever) WithWeights ¶
func (e *EnsembleRetriever) WithWeights(weights []float64) *EnsembleRetriever
WithWeights 设置权重
type FusionStrategy ¶
type FusionStrategy string
FusionStrategy 融合策略
const ( // FusionStrategyWeightedSum 加权求和 FusionStrategyWeightedSum FusionStrategy = "weighted_sum" // FusionStrategyRRF 倒数排名融合 (Reciprocal Rank Fusion) FusionStrategyRRF FusionStrategy = "rrf" // FusionStrategyCombSum 组合求和 FusionStrategyCombSum FusionStrategy = "comb_sum" )
type HuggingFaceEmbedder ¶ added in v0.6.0
type HuggingFaceEmbedder struct {
*BaseEmbedder
// contains filtered or unexported fields
}
HuggingFaceEmbedder 使用 Hugging Face 的嵌入模型
支持 sentence-transformers 系列模型和自定义推理端点
func NewHuggingFaceEmbedder ¶ added in v0.6.0
func NewHuggingFaceEmbedder(config HuggingFaceEmbedderConfig) (*HuggingFaceEmbedder, error)
NewHuggingFaceEmbedder 创建 Hugging Face 嵌入器
func (*HuggingFaceEmbedder) EmbedQuery ¶ added in v0.6.0
EmbedQuery 嵌入单个查询文本
type HuggingFaceEmbedderConfig ¶ added in v0.6.0
type HuggingFaceEmbedderConfig struct {
// APIKey Hugging Face API Key
APIKey string
// Model 模型名称,如 sentence-transformers/all-MiniLM-L6-v2
Model string
// Endpoint 自定义推理端点(可选)
Endpoint string
// Dimensions 向量维度
Dimensions int
}
HuggingFaceEmbedderConfig Hugging Face 嵌入器配置
type HybridRetriever ¶
type HybridRetriever struct {
*BaseRetriever
// VectorRetriever 向量检索器
VectorRetriever Retriever
// KeywordRetriever 关键词检索器
KeywordRetriever Retriever
// VectorWeight 向量检索的权重(0-1)
VectorWeight float64
// KeywordWeight 关键词检索的权重(0-1)
KeywordWeight float64
// FusionStrategy 融合策略
FusionStrategy FusionStrategy
}
HybridRetriever 混合检索器
结合向量检索和关键词检索,使用加权融合策略
func NewHybridRetriever ¶
func NewHybridRetriever( vectorRetriever, keywordRetriever Retriever, vectorWeight, keywordWeight float64, config RetrieverConfig, ) *HybridRetriever
NewHybridRetriever 创建混合检索器
func (*HybridRetriever) GetRelevantDocuments ¶
func (h *HybridRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
GetRelevantDocuments 检索相关文档
func (*HybridRetriever) WithFusionStrategy ¶
func (h *HybridRetriever) WithFusionStrategy(strategy FusionStrategy) *HybridRetriever
WithFusionStrategy 设置融合策略
func (*HybridRetriever) WithWeights ¶
func (h *HybridRetriever) WithWeights(vectorWeight, keywordWeight float64) *HybridRetriever
WithWeights 设置权重
type InvertedIndex ¶
type InvertedIndex struct {
// contains filtered or unexported fields
}
InvertedIndex 倒排索引
func (*InvertedIndex) AddDocument ¶
func (idx *InvertedIndex) AddDocument(docID int, terms []string)
AddDocument 添加文档到索引
func (*InvertedIndex) AverageDocLength ¶
func (idx *InvertedIndex) AverageDocLength() float64
AverageDocLength 获取平均文档长度
func (*InvertedIndex) DocumentFrequency ¶
func (idx *InvertedIndex) DocumentFrequency(term string) int
DocumentFrequency 获取词的文档频率(包含该词的文档数)
func (*InvertedIndex) TermFrequency ¶
func (idx *InvertedIndex) TermFrequency(docID int, term string) int
TermFrequency 获取词在文档中的频率
type KeywordAlgorithm ¶
type KeywordAlgorithm string
KeywordAlgorithm 关键词检索算法
const ( // AlgorithmBM25 BM25 算法 AlgorithmBM25 KeywordAlgorithm = "bm25" // AlgorithmTFIDF TF-IDF 算法 AlgorithmTFIDF KeywordAlgorithm = "tfidf" )
type KeywordRetriever ¶
type KeywordRetriever struct {
*BaseRetriever
// Documents 文档集合
Documents []*interfaces.Document
// Algorithm 检索算法
Algorithm KeywordAlgorithm
// Index 倒排索引
Index *InvertedIndex
}
KeywordRetriever 关键词检索器
使用 BM25 或 TF-IDF 算法进行基于关键词的检索
func NewKeywordRetriever ¶
func NewKeywordRetriever(docs []*interfaces.Document, config RetrieverConfig) *KeywordRetriever
NewKeywordRetriever 创建关键词检索器
func (*KeywordRetriever) GetRelevantDocuments ¶
func (k *KeywordRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
GetRelevantDocuments 检索相关文档
func (*KeywordRetriever) WithAlgorithm ¶
func (k *KeywordRetriever) WithAlgorithm(algorithm KeywordAlgorithm) *KeywordRetriever
WithAlgorithm 设置检索算法
type LLMReranker ¶
type LLMReranker struct {
*BaseReranker
// TopN 返回前 N 个文档
TopN int
// Prompt 提示词模板
Prompt string
}
LLMReranker LLM 重排序器
使用 LLM 对文档进行相关性判断和排序
func (*LLMReranker) Rerank ¶
func (l *LLMReranker) Rerank(ctx context.Context, query string, docs []*interfaces.Document) ([]*interfaces.Document, error)
Rerank 重新排序文档
type MMRReranker ¶
type MMRReranker struct {
*BaseReranker
// Lambda 相关性和多样性的平衡参数(0-1)
// 0: 只考虑多样性,1: 只考虑相关性
Lambda float64
// TopN 返回前 N 个文档
TopN int
}
MMRReranker 最大边际相关性重排序器
使用 MMR 算法平衡相关性和多样性
func NewMMRReranker ¶
func NewMMRReranker(lambda float64, topN int) *MMRReranker
NewMMRReranker 创建 MMR 重排序器
func (*MMRReranker) Rerank ¶
func (m *MMRReranker) Rerank(ctx context.Context, query string, docs []*interfaces.Document) ([]*interfaces.Document, error)
Rerank 使用 MMR 算法重新排序
MMR = λ * Sim(D, Q) - (1-λ) * max(Sim(D, Di)) 其中 Di 是已选择的文档
type MemoryVectorStore ¶
type MemoryVectorStore struct {
// contains filtered or unexported fields
}
MemoryVectorStore 内存向量存储实现
线程安全的内存向量存储,支持: - 余弦相似度搜索 - 向量和文档的增删改查 - 自动向量化
func NewMemoryVectorStore ¶
func NewMemoryVectorStore(config MemoryVectorStoreConfig) *MemoryVectorStore
NewMemoryVectorStore 创建内存向量存储
func (*MemoryVectorStore) Add ¶
func (m *MemoryVectorStore) Add(ctx context.Context, docs []*interfaces.Document, vectors [][]float32) error
Add 添加文档和向量
func (*MemoryVectorStore) AddDocuments ¶
func (m *MemoryVectorStore) AddDocuments(ctx context.Context, docs []*interfaces.Document) error
AddDocuments 添加文档(实现 VectorStore 接口)
func (*MemoryVectorStore) Delete ¶
func (m *MemoryVectorStore) Delete(ctx context.Context, ids []string) error
Delete 删除文档
func (*MemoryVectorStore) Get ¶
func (m *MemoryVectorStore) Get(ctx context.Context, id string) (*interfaces.Document, error)
Get 获取文档
func (*MemoryVectorStore) GetEmbedding ¶
GetEmbedding 获取嵌入向量(实现扩展接口)
func (*MemoryVectorStore) Search ¶
func (m *MemoryVectorStore) Search(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
Search 相似度搜索
func (*MemoryVectorStore) SearchByVector ¶
func (m *MemoryVectorStore) SearchByVector(ctx context.Context, queryVector []float32, topK int) ([]*interfaces.Document, error)
SearchByVector 通过向量搜索
func (*MemoryVectorStore) SimilaritySearch ¶
func (m *MemoryVectorStore) SimilaritySearch(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
SimilaritySearch 相似度搜索(实现 VectorStore 接口)
func (*MemoryVectorStore) SimilaritySearchWithScore ¶
func (m *MemoryVectorStore) SimilaritySearchWithScore(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
SimilaritySearchWithScore 带分数的相似度搜索(实现 VectorStore 接口)
func (*MemoryVectorStore) Update ¶
func (m *MemoryVectorStore) Update(ctx context.Context, docs []*interfaces.Document) error
Update 更新文档
type MemoryVectorStoreConfig ¶
type MemoryVectorStoreConfig struct {
Embedder Embedder
DistanceMetric DistanceMetric
}
MemoryVectorStoreConfig 内存向量存储配置
type MockVectorStore ¶
type MockVectorStore struct {
// contains filtered or unexported fields
}
MockVectorStore 模拟向量存储(用于测试和示例)
func (*MockVectorStore) AddDocuments ¶
func (m *MockVectorStore) AddDocuments(ctx context.Context, docs []*interfaces.Document) error
AddDocuments 添加文档
func (*MockVectorStore) Delete ¶
func (m *MockVectorStore) Delete(ctx context.Context, ids []string) error
Delete 删除文档
func (*MockVectorStore) GetAllDocuments ¶
func (m *MockVectorStore) GetAllDocuments() []*interfaces.Document
GetAllDocuments 获取所有文档
func (*MockVectorStore) LoadDocuments ¶
func (m *MockVectorStore) LoadDocuments(docs []*interfaces.Document)
LoadDocuments 加载文档到向量存储
func (*MockVectorStore) SimilaritySearch ¶
func (m *MockVectorStore) SimilaritySearch(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
SimilaritySearch 相似度搜索(模拟实现)
func (*MockVectorStore) SimilaritySearchWithScore ¶
func (m *MockVectorStore) SimilaritySearchWithScore(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
SimilaritySearchWithScore 带分数的相似度搜索
type MultiQueryRetriever ¶
type MultiQueryRetriever struct {
*BaseRetriever
// BaseRetriever 基础检索器
Retriever Retriever
// LLMClient LLM 客户端(用于生成查询变体)
LLMClient llm.Client
// NumQueries 生成的查询数量
NumQueries int
// QueryPrompt 查询生成提示词
QueryPrompt string
}
MultiQueryRetriever 多查询检索器
使用 LLM 生成多个查询变体,并对所有变体进行检索,最后合并结果
func NewMultiQueryRetriever ¶
func NewMultiQueryRetriever( baseRetriever Retriever, llmClient llm.Client, numQueries int, config RetrieverConfig, ) *MultiQueryRetriever
NewMultiQueryRetriever 创建多查询检索器
func (*MultiQueryRetriever) GetRelevantDocuments ¶
func (m *MultiQueryRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
GetRelevantDocuments 检索相关文档
func (*MultiQueryRetriever) WithNumQueries ¶
func (m *MultiQueryRetriever) WithNumQueries(num int) *MultiQueryRetriever
WithNumQueries 设置生成的查询数量
func (*MultiQueryRetriever) WithQueryPrompt ¶
func (m *MultiQueryRetriever) WithQueryPrompt(prompt string) *MultiQueryRetriever
WithQueryPrompt 设置查询生成提示词
type OpenAIEmbedder ¶ added in v0.6.0
type OpenAIEmbedder struct {
*BaseEmbedder
// contains filtered or unexported fields
}
OpenAIEmbedder 使用 OpenAI 的嵌入模型
支持 text-embedding-3-small, text-embedding-3-large 等模型
func NewOpenAIEmbedder ¶ added in v0.6.0
func NewOpenAIEmbedder(config OpenAIEmbedderConfig) (*OpenAIEmbedder, error)
NewOpenAIEmbedder 创建 OpenAI 嵌入器
func (*OpenAIEmbedder) EmbedQuery ¶ added in v0.6.0
EmbedQuery 嵌入单个查询文本
type OpenAIEmbedderConfig ¶ added in v0.6.0
type OpenAIEmbedderConfig struct {
// APIKey OpenAI API Key
APIKey string
// Model 模型名称,如 text-embedding-3-small
Model string
// BaseURL API 基础 URL(可选,用于兼容其他 API)
BaseURL string
// Dimensions 向量维度
Dimensions int
}
OpenAIEmbedderConfig OpenAI 嵌入器配置
type QdrantConfig ¶
type QdrantConfig struct {
// URL Qdrant 服务地址
URL string
// APIKey API 密钥(如果需要)
APIKey string
// CollectionName 集合名称
CollectionName string
// VectorSize 向量维度
VectorSize int
// Distance 距离度量类型: cosine, euclidean, dot
Distance string
// Embedder 嵌入器(用于自动向量化)
Embedder Embedder
}
QdrantConfig Qdrant 配置
type QdrantVectorStore ¶
type QdrantVectorStore struct {
// contains filtered or unexported fields
}
QdrantVectorStore Qdrant 向量数据库存储
提供基于 Qdrant 的向量存储实现,支持高性能的语义搜索
func NewQdrantVectorStore ¶
func NewQdrantVectorStore(ctx context.Context, config QdrantConfig) (*QdrantVectorStore, error)
NewQdrantVectorStore 创建 Qdrant 向量存储
参数:
- config: Qdrant 配置
返回:
- *QdrantVectorStore: Qdrant 向量存储实例
- error: 错误信息
func (*QdrantVectorStore) Add ¶
func (q *QdrantVectorStore) Add(ctx context.Context, docs []*interfaces.Document, vectors [][]float32) error
Add 添加文档和向量
func (*QdrantVectorStore) AddDocuments ¶
func (q *QdrantVectorStore) AddDocuments(ctx context.Context, docs []*interfaces.Document) error
AddDocuments 添加文档(实现 VectorStore 接口)
func (*QdrantVectorStore) Delete ¶
func (q *QdrantVectorStore) Delete(ctx context.Context, ids []string) error
Delete 删除文档
func (*QdrantVectorStore) GetEmbedding ¶
GetEmbedding 获取嵌入向量
func (*QdrantVectorStore) Search ¶
func (q *QdrantVectorStore) Search(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
Search 相似度搜索
func (*QdrantVectorStore) SearchByVector ¶
func (q *QdrantVectorStore) SearchByVector(ctx context.Context, queryVector []float32, topK int) ([]*interfaces.Document, error)
SearchByVector 通过向量搜索
func (*QdrantVectorStore) SimilaritySearch ¶
func (q *QdrantVectorStore) SimilaritySearch(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
SimilaritySearch 相似度搜索(实现 VectorStore 接口)
func (*QdrantVectorStore) SimilaritySearchWithScore ¶
func (q *QdrantVectorStore) SimilaritySearchWithScore(ctx context.Context, query string, topK int) ([]*interfaces.Document, error)
SimilaritySearchWithScore 带分数的相似度搜索(实现 VectorStore 接口)
func (*QdrantVectorStore) Update ¶
func (q *QdrantVectorStore) Update(ctx context.Context, docs []*interfaces.Document) error
Update 更新文档
type QdrantVectorStoreOption ¶
type QdrantVectorStoreOption func(*QdrantConfig)
QdrantVectorStoreOption Qdrant 选项函数
func WithQdrantAPIKey ¶
func WithQdrantAPIKey(apiKey string) QdrantVectorStoreOption
WithQdrantAPIKey 设置 API 密钥
func WithQdrantDistance ¶
func WithQdrantDistance(distance string) QdrantVectorStoreOption
WithQdrantDistance 设置距离度量
func WithQdrantEmbedder ¶
func WithQdrantEmbedder(embedder Embedder) QdrantVectorStoreOption
WithQdrantEmbedder 设置嵌入器
type RAGChain ¶
type RAGChain struct {
// contains filtered or unexported fields
}
RAGChain RAG 链,组合检索和生成
func NewRAGChain ¶
func NewRAGChain(retriever *RAGRetriever, llmClient llm.Client) *RAGChain
NewRAGChain 创建 RAG 链
参数:
- retriever: RAG 检索器
- llmClient: LLM 客户端(可选,如果为 nil 则仅返回检索结果)
返回:
- *RAGChain: RAG 链实例
type RAGMultiQueryRetriever ¶
type RAGMultiQueryRetriever struct {
BaseRetriever *RAGRetriever
NumQueries int
LLMClient llm.Client // LLM 客户端用于生成查询变体
}
RAGMultiQueryRetriever RAG 多查询检索器
生成多个相关查询并合并结果,提高召回率
func NewRAGMultiQueryRetriever ¶
func NewRAGMultiQueryRetriever(baseRetriever *RAGRetriever, numQueries int, llmClient llm.Client) *RAGMultiQueryRetriever
NewRAGMultiQueryRetriever 创建 RAG 多查询检索器
参数:
- baseRetriever: 基础 RAG 检索器
- numQueries: 生成的查询数量
- llmClient: LLM 客户端(可选,如果为 nil 则只使用原始查询)
返回:
- *RAGMultiQueryRetriever: 多查询检索器实例
func (*RAGMultiQueryRetriever) Retrieve ¶
func (m *RAGMultiQueryRetriever) Retrieve(ctx context.Context, query string) ([]*interfaces.Document, error)
Retrieve 检索相关文档
使用 LLM 生成查询变体,然后对每个查询进行检索并合并结果
参数:
- ctx: 上下文
- query: 原始查询
返回:
- []*interfaces.Document: 合并后的文档列表
- error: 错误信息
type RAGRetriever ¶
type RAGRetriever struct {
// contains filtered or unexported fields
}
RAGRetriever RAG (Retrieval-Augmented Generation) 检索器
结合向量检索和生成模型,提供增强的文档检索能力
func NewRAGRetriever ¶
func NewRAGRetriever(config RAGRetrieverConfig) (*RAGRetriever, error)
NewRAGRetriever 创建 RAG 检索器
func (*RAGRetriever) AddDocuments ¶
func (r *RAGRetriever) AddDocuments(ctx context.Context, docs []*interfaces.Document) error
AddDocuments 添加文档到向量存储
func (*RAGRetriever) Retrieve ¶
func (r *RAGRetriever) Retrieve(ctx context.Context, query string) ([]*interfaces.Document, error)
Retrieve 检索相关文档
func (*RAGRetriever) RetrieveAndFormat ¶
func (r *RAGRetriever) RetrieveAndFormat(ctx context.Context, query string, template string) (string, error)
RetrieveAndFormat 检索并格式化为 Prompt
使用指定的模板格式化检索到的文档
func (*RAGRetriever) SetScoreThreshold ¶
func (r *RAGRetriever) SetScoreThreshold(threshold float32)
SetScoreThreshold 设置分数阈值
type RAGRetrieverConfig ¶
type RAGRetrieverConfig struct {
VectorStore interfaces.VectorStore
Embedder Embedder
TopK int
ScoreThreshold float32
IncludeMetadata bool
MaxContentLength int
}
RAGRetrieverConfig RAG 检索器配置
type RankFusion ¶
type RankFusion struct {
// Method 融合方法
Method string // "rrf", "borda", "comb_sum"
// K RRF 参数
K float64
}
RankFusion 排名融合
func (*RankFusion) Fuse ¶
func (rf *RankFusion) Fuse(rankings [][]*interfaces.Document) []*interfaces.Document
Fuse 融合多个排名结果
type Reranker ¶
type Reranker interface {
// Rerank 重新排序文档
Rerank(ctx context.Context, query string, docs []*interfaces.Document) ([]*interfaces.Document, error)
}
Reranker 重排序器接口
对检索到的文档进行重新排序,提高结果质量
type RerankingRetriever ¶
type RerankingRetriever struct {
*BaseRetriever
// BaseRetriever 基础检索器
Retriever Retriever
// Reranker 重排序器
Reranker Reranker
// FetchK 初始检索的文档数量
FetchK int
}
RerankingRetriever 带重排序的检索器
在基础检索器之上应用重排序
func NewRerankingRetriever ¶
func NewRerankingRetriever( baseRetriever Retriever, reranker Reranker, fetchK int, config RetrieverConfig, ) *RerankingRetriever
NewRerankingRetriever 创建带重排序的检索器
func (*RerankingRetriever) GetRelevantDocuments ¶
func (r *RerankingRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
GetRelevantDocuments 检索并重排序文档
type Retriever ¶
type Retriever interface {
core.Runnable[string, []*interfaces.Document]
// GetRelevantDocuments 检索相关文档
//
// 参数:
// - ctx: 上下文
// - query: 查询字符串
//
// 返回:
// - []*interfaces.Document: 相关文档列表
// - error: 错误信息
GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
}
Retriever 定义检索器接口
借鉴 LangChain 的 Retriever 设计,提供统一的文档检索接口 继承自 Runnable[string, []*interfaces.Document],支持管道操作和回调
type RetrieverConfig ¶
type RetrieverConfig struct {
TopK int // 返回的最大文档数
MinScore float64 // 最小分数阈值
Name string // 检索器名称
}
RetrieverConfig 检索器配置
func DefaultRetrieverConfig ¶
func DefaultRetrieverConfig() RetrieverConfig
DefaultRetrieverConfig 返回默认配置
type SearchType ¶
type SearchType string
SearchType 搜索类型
const ( // SearchTypeSimilarity 相似度搜索 SearchTypeSimilarity SearchType = "similarity" // SearchTypeSimilarityScoreThreshold 基于相似度阈值的搜索 SearchTypeSimilarityScoreThreshold SearchType = "similarity_score_threshold" // SearchTypeMMR 最大边际相关性搜索 SearchTypeMMR SearchType = "mmr" )
type SimpleEmbedder ¶
type SimpleEmbedder struct {
*BaseEmbedder
// contains filtered or unexported fields
}
SimpleEmbedder 简单的 TF-IDF 风格嵌入器(用于测试和开发)
使用简单的词频向量表示,不依赖外部模型
func NewSimpleEmbedder ¶
func NewSimpleEmbedder(dimensions int) *SimpleEmbedder
NewSimpleEmbedder 创建简单嵌入器
func (*SimpleEmbedder) EmbedQuery ¶
EmbedQuery 嵌入单个查询文本
type VectorStoreRetriever ¶
type VectorStoreRetriever struct {
*BaseRetriever
// VectorStore 向量存储实例
VectorStore interfaces.VectorStore
// SearchType 搜索类型
SearchType SearchType
// SearchKwargs 搜索参数
SearchKwargs map[string]interface{}
}
VectorStoreRetriever 向量存储检索器
使用向量相似度进行文档检索
func NewVectorStoreRetriever ¶
func NewVectorStoreRetriever(vectorStore interfaces.VectorStore, config RetrieverConfig) *VectorStoreRetriever
NewVectorStoreRetriever 创建向量存储检索器
func (*VectorStoreRetriever) GetRelevantDocuments ¶
func (v *VectorStoreRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]*interfaces.Document, error)
GetRelevantDocuments 检索相关文档
func (*VectorStoreRetriever) WithSearchKwargs ¶
func (v *VectorStoreRetriever) WithSearchKwargs(kwargs map[string]interface{}) *VectorStoreRetriever
WithSearchKwargs 设置搜索参数
func (*VectorStoreRetriever) WithSearchType ¶
func (v *VectorStoreRetriever) WithSearchType(searchType SearchType) *VectorStoreRetriever
WithSearchType 设置搜索类型
type VertexAIEmbedder ¶ added in v0.6.0
type VertexAIEmbedder struct {
*BaseEmbedder
// contains filtered or unexported fields
}
VertexAIEmbedder 使用 Google Vertex AI 的嵌入模型
支持 text-embedding-005 等模型,提供高质量的语义嵌入
func NewVertexAIEmbedder ¶ added in v0.6.0
func NewVertexAIEmbedder(ctx context.Context, config VertexAIEmbedderConfig) (*VertexAIEmbedder, error)
NewVertexAIEmbedder 创建 Vertex AI 嵌入器
func (*VertexAIEmbedder) Close ¶ added in v0.6.0
func (e *VertexAIEmbedder) Close() error
Close 关闭客户端
func (*VertexAIEmbedder) EmbedQuery ¶ added in v0.6.0
EmbedQuery 嵌入单个查询文本
type VertexAIEmbedderConfig ¶ added in v0.6.0
type VertexAIEmbedderConfig struct {
// ProjectID Google Cloud 项目 ID
ProjectID string
// Location 区域,如 us-central1
Location string
// ModelID 模型 ID,如 text-embedding-005
ModelID string
// Dimensions 向量维度(text-embedding-005 默认 768)
Dimensions int
}
VertexAIEmbedderConfig Vertex AI 嵌入器配置