rag

package

v1.3.0 Latest Latest Go to latest Published: Feb 26, 2026 License: MIT Imports: 26 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/BaSui01/agentflow

Links

Open Source Insights

Documentation ¶

Overview ¶

Config → RAG 桥接层。

提供工厂函数，将全局 config.Config 转换为 rag 包的运行时实例，消除 config 包和 rag 包之间的手动配置映射。

Index ¶

func EmbeddingSimilarity(a, b []float64) float64
func Float32ToFloat64(v []float32) []float64
func Float64ToFloat32(v []float64) []float32
func NewEmbeddingProviderFromConfig(cfg *config.Config, providerType EmbeddingProviderType) (embedding.Provider, error)
type ChainVisualization
type Chunk
type ChunkingConfig
- func DefaultChunkingConfig() ChunkingConfig
type ChunkingStrategy
type Clearable
type ContextProvider
type ContextualRetrieval
- func NewContextualRetrieval(retriever *HybridRetriever, contextProvider ContextProvider, ...) *ContextualRetrieval
- func (r *ContextualRetrieval) CleanExpiredCache() int
- func (r *ContextualRetrieval) IndexDocumentsWithContext(ctx context.Context, docs []Document) error
- func (r *ContextualRetrieval) Retrieve(ctx context.Context, query string, queryEmbedding []float64) ([]RetrievalResult, error)
- func (r *ContextualRetrieval) UpdateIDFStats(docs []Document)
type ContextualRetrievalConfig
- func DefaultContextualRetrievalConfig() ContextualRetrievalConfig
type CrossEncoderConfig
- func DefaultCrossEncoderConfig() CrossEncoderConfig
type CrossEncoderProvider
type CrossEncoderReranker
- func NewCrossEncoderReranker(provider CrossEncoderProvider, config CrossEncoderConfig, logger *zap.Logger) *CrossEncoderReranker
- func (r *CrossEncoderReranker) Rerank(ctx context.Context, query string, results []RetrievalResult) ([]RetrievalResult, error)
type DedupStats
type Document
type DocumentChunker
- func NewDocumentChunker(config ChunkingConfig, tokenizer Tokenizer, logger *zap.Logger) *DocumentChunker
- func (c *DocumentChunker) ChunkDocument(doc Document) []Chunk
type DocumentLister
type Edge
type EmbeddingProvider
type EmbeddingProviderType
type EnhancedRetriever
- func NewCohereRetriever(apiKey string, logger *zap.Logger) *EnhancedRetriever
- func NewEnhancedRetriever(cfg EnhancedRetrieverConfig, logger *zap.Logger) *EnhancedRetriever
- func NewJinaRetriever(apiKey string, logger *zap.Logger) *EnhancedRetriever
- func NewOpenAIRetriever(apiKey string, logger *zap.Logger) *EnhancedRetriever
- func NewRetrieverFromConfig(cfg *config.Config, opts ...RetrieverOption) (*EnhancedRetriever, error)
- func NewVoyageRetriever(apiKey string, logger *zap.Logger) *EnhancedRetriever
- func (r *EnhancedRetriever) IndexDocumentsWithEmbedding(ctx context.Context, docs []Document) error
- func (r *EnhancedRetriever) RetrieveWithProviders(ctx context.Context, query string) ([]RetrievalResult, error)
type EnhancedRetrieverConfig
type EnhancedTokenizer
- func (t *EnhancedTokenizer) CountTokens(text string) int
- func (t *EnhancedTokenizer) Encode(text string) []int
type Entity
type ExpansionResult
type GraphDocument
type GraphEmbedder
type GraphRAG
- func NewGraphRAG(graph *KnowledgeGraph, vectorStore LowLevelVectorStore, embedder GraphEmbedder, ...) *GraphRAG
- func (r *GraphRAG) AddDocument(ctx context.Context, doc GraphDocument) error
- func (r *GraphRAG) Retrieve(ctx context.Context, query string) ([]GraphRetrievalResult, error)
type GraphRAGConfig
- func DefaultGraphRAGConfig() GraphRAGConfig
type GraphRetrievalResult
type HNSWConfig
- func AdaptiveHNSWConfig(dataSize int) HNSWConfig
- func DefaultHNSWConfig() HNSWConfig
type HNSWIndex
- func NewHNSWIndex(config HNSWConfig, logger *zap.Logger) *HNSWIndex
- func (idx *HNSWIndex) Add(vector []float64, id string) error
- func (idx *HNSWIndex) Build(vectors [][]float64, ids []string) error
- func (idx *HNSWIndex) Delete(id string) error
- func (idx *HNSWIndex) Search(query []float64, k int) ([]SearchResult, error)
- func (idx *HNSWIndex) Size() int
type HopType
type HybridRetrievalConfig
- func DefaultHybridRetrievalConfig() HybridRetrievalConfig
type HybridRetriever
- func NewHybridRetriever(config HybridRetrievalConfig, logger *zap.Logger) *HybridRetriever
- func NewHybridRetrieverWithVectorStore(config HybridRetrievalConfig, vectorStore VectorStore, logger *zap.Logger) *HybridRetriever
- func (r *HybridRetriever) IndexDocuments(docs []Document) error
- func (r *HybridRetriever) Retrieve(ctx context.Context, query string, queryEmbedding []float64) ([]RetrievalResult, error)
type InMemoryVectorStore
- func NewInMemoryVectorStore(logger *zap.Logger) *InMemoryVectorStore
- func (s *InMemoryVectorStore) AddDocuments(ctx context.Context, docs []Document) error
- func (s *InMemoryVectorStore) ClearAll(ctx context.Context) error
- func (s *InMemoryVectorStore) Count(ctx context.Context) (int, error)
- func (s *InMemoryVectorStore) DeleteDocuments(ctx context.Context, ids []string) error
- func (s *InMemoryVectorStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)
- func (s *InMemoryVectorStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)
- func (s *InMemoryVectorStore) UpdateDocument(ctx context.Context, doc Document) error
type IndexType
type KnowledgeGraph
- func NewKnowledgeGraph(logger *zap.Logger) *KnowledgeGraph
- func (g *KnowledgeGraph) AddEdge(edge *Edge)
- func (g *KnowledgeGraph) AddNode(node *Node)
- func (g *KnowledgeGraph) GetNeighbors(nodeID string, depth int) []*Node
- func (g *KnowledgeGraph) GetNode(id string) (*Node, bool)
- func (g *KnowledgeGraph) QueryByType(nodeType string) []*Node
type LLMContextProvider
- func NewLLMContextProvider(llmProvider func(context.Context, string) (string, error), logger *zap.Logger) *LLMContextProvider
- func (p *LLMContextProvider) GenerateContext(ctx context.Context, doc Document, chunk string) (string, error)
type LLMReranker
- func NewLLMReranker(provider LLMRerankerProvider, config LLMRerankerConfig, logger *zap.Logger) *LLMReranker
- func (r *LLMReranker) Rerank(ctx context.Context, query string, results []RetrievalResult) ([]RetrievalResult, error)
type LLMRerankerConfig
- func DefaultLLMRerankerConfig() LLMRerankerConfig
type LLMRerankerProvider
type LLMTokenizerAdapter
- func NewLLMTokenizerAdapter(inner lltok.Tokenizer, logger *zap.Logger) *LLMTokenizerAdapter
- func (a *LLMTokenizerAdapter) CountTokens(text string) int
- func (a *LLMTokenizerAdapter) Encode(text string) []int
type LowLevelSearchResult
type LowLevelVectorStore
type MilvusConfig
type MilvusIndexType
type MilvusMetricType
type MilvusStore
- func NewMilvusStore(cfg MilvusConfig, logger *zap.Logger) *MilvusStore
- func (s *MilvusStore) AddDocuments(ctx context.Context, docs []Document) error
- func (s *MilvusStore) ClearAll(ctx context.Context) error
- func (s *MilvusStore) Count(ctx context.Context) (int, error)
- func (s *MilvusStore) DeleteDocuments(ctx context.Context, ids []string) error
- func (s *MilvusStore) DropCollection(ctx context.Context) error
- func (s *MilvusStore) Flush(ctx context.Context) error
- func (s *MilvusStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)
- func (s *MilvusStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)
- func (s *MilvusStore) UpdateDocument(ctx context.Context, doc Document) error
type MultiHopConfig
- func DefaultMultiHopConfig() MultiHopConfig
type MultiHopReasoner
- func NewMultiHopReasoner(config MultiHopConfig, retriever *HybridRetriever, ...) *MultiHopReasoner
- func (r *MultiHopReasoner) Reason(ctx context.Context, query string) (*ReasoningChain, error)
- func (r *MultiHopReasoner) ReasonBatch(ctx context.Context, queries []string) ([]*ReasoningChain, error)
type MultiStrategyDecision
type Node
type PineconeConfig
type PineconeStore
- func NewPineconeStore(cfg PineconeConfig, logger *zap.Logger) *PineconeStore
- func (s *PineconeStore) AddDocuments(ctx context.Context, docs []Document) error
- func (s *PineconeStore) ClearAll(ctx context.Context) error
- func (s *PineconeStore) Count(ctx context.Context) (int, error)
- func (s *PineconeStore) DeleteDocuments(ctx context.Context, ids []string) error
- func (s *PineconeStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)
- func (s *PineconeStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)
- func (s *PineconeStore) UpdateDocument(ctx context.Context, doc Document) error
type QdrantConfig
type QdrantStore
- func NewQdrantStore(cfg QdrantConfig, logger *zap.Logger) *QdrantStore
- func (s *QdrantStore) AddDocuments(ctx context.Context, docs []Document) error
- func (s *QdrantStore) ClearAll(ctx context.Context) error
- func (s *QdrantStore) Count(ctx context.Context) (int, error)
- func (s *QdrantStore) DeleteDocuments(ctx context.Context, ids []string) error
- func (s *QdrantStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)
- func (s *QdrantStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)
- func (s *QdrantStore) UpdateDocument(ctx context.Context, doc Document) error
type QueryDocPair
type QueryFeatures
type QueryIntent
type QueryLLMProvider
type QueryRouter
- func NewQueryRouter(config QueryRouterConfig, queryTransformer *QueryTransformer, ...) *QueryRouter
- func (r *QueryRouter) GetStrategyStats() map[RetrievalStrategy]StrategyStats
- func (r *QueryRouter) RecordFeedback(feedback RoutingFeedback)
- func (r *QueryRouter) Route(ctx context.Context, query string) (*RoutingDecision, error)
- func (r *QueryRouter) RouteBatch(ctx context.Context, queries []string) ([]*RoutingDecision, error)
- func (r *QueryRouter) RouteMulti(ctx context.Context, query string, maxStrategies int) (*MultiStrategyDecision, error)
type QueryRouterConfig
- func DefaultQueryRouterConfig() QueryRouterConfig
type QueryTransformConfig
- func DefaultQueryTransformConfig() QueryTransformConfig
type QueryTransformer
- func NewQueryTransformer(config QueryTransformConfig, llmProvider QueryLLMProvider, logger *zap.Logger) *QueryTransformer
- func (t *QueryTransformer) Expand(ctx context.Context, query string) ([]string, error)
- func (t *QueryTransformer) ExpandWithMetadata(ctx context.Context, query string) (*ExpansionResult, error)
- func (t *QueryTransformer) Transform(ctx context.Context, query string) (*TransformedQuery, error)
- func (t *QueryTransformer) TransformBatch(ctx context.Context, queries []string) ([]*TransformedQuery, error)
type ReasoningChain
- func (c *ReasoningChain) FromJSON(data []byte) error
- func (c *ReasoningChain) GetAllDocuments() []Document
- func (c *ReasoningChain) GetHop(hopNum int) *ReasoningHop
- func (c *ReasoningChain) GetTopDocuments(k int) []RetrievalResult
- func (c *ReasoningChain) ToJSON() ([]byte, error)
- func (c *ReasoningChain) Visualize() *ChainVisualization
type ReasoningHop
type ReasoningStatus
type RerankProvider
type RerankProviderType
type Reranker
type RerankerType
type RetrievalResult
type RetrievalStrategy
type RetrieverOption
- func WithEmbeddingType(t EmbeddingProviderType) RetrieverOption
- func WithLogger(l *zap.Logger) RetrieverOption
- func WithRerankType(t RerankProviderType) RetrieverOption
type RoutingCondition
type RoutingDecision
- func (d *RoutingDecision) FromJSON(data []byte) error
- func (d *RoutingDecision) ToJSON() ([]byte, error)
type RoutingFeedback
type SearchResult
type SemanticCache
- func NewSemanticCache(store VectorStore, config SemanticCacheConfig, logger *zap.Logger) *SemanticCache
- func (c *SemanticCache) Clear(ctx context.Context) error
- func (c *SemanticCache) Get(ctx context.Context, queryEmbedding []float64) (*Document, bool)
- func (c *SemanticCache) Set(ctx context.Context, doc Document) error
type SemanticCacheConfig
type SimpleContextProvider
- func NewSimpleContextProvider(logger *zap.Logger) *SimpleContextProvider
- func (p *SimpleContextProvider) GenerateContext(ctx context.Context, doc Document, chunk string) (string, error)
type SimpleGraphEmbedder
- func NewSimpleGraphEmbedder(config SimpleGraphEmbedderConfig, logger *zap.Logger) *SimpleGraphEmbedder
- func (e *SimpleGraphEmbedder) Embed(ctx context.Context, text string) ([]float64, error)
type SimpleGraphEmbedderConfig
type SimpleReranker
- func NewSimpleReranker(logger *zap.Logger) *SimpleReranker
- func (r *SimpleReranker) Rerank(ctx context.Context, query string, results []RetrievalResult) ([]RetrievalResult, error)
type SimpleTokenizer
- func (t *SimpleTokenizer) CountTokens(text string) int
- func (t *SimpleTokenizer) Encode(text string) []int
type StrategyConfig
type StrategyStats
type StrategyWithWeight
type StructuralBlock
type Tokenizer
- func NewEstimatorAdapter(model string, maxTokens int, logger *zap.Logger) Tokenizer
- func NewTiktokenAdapter(model string, logger *zap.Logger) (Tokenizer, error)
type TransformationType
type TransformedQuery
- func (tq *TransformedQuery) FromJSON(data []byte) error
- func (tq *TransformedQuery) ToJSON() ([]byte, error)
type Triple
type VectorIndex
type VectorSearchResult
type VectorStore
- func NewPineconeVectorStore(cfg PineconeConfig, logger *zap.Logger) VectorStore
- func NewVectorStoreFromConfig(cfg *config.Config, storeType VectorStoreType, logger *zap.Logger) (VectorStore, error)
type VectorStoreType
type VisualizationEdge
type VisualizationNode
type WeaviateConfig
type WeaviateStore
- func NewWeaviateStore(cfg WeaviateConfig, logger *zap.Logger) *WeaviateStore
- func (s *WeaviateStore) AddDocuments(ctx context.Context, docs []Document) error
- func (s *WeaviateStore) BM25Search(ctx context.Context, queryText string, topK int) ([]VectorSearchResult, error)
- func (s *WeaviateStore) ClearAll(ctx context.Context) error
- func (s *WeaviateStore) Count(ctx context.Context) (int, error)
- func (s *WeaviateStore) DeleteClass(ctx context.Context) error
- func (s *WeaviateStore) DeleteDocuments(ctx context.Context, ids []string) error
- func (s *WeaviateStore) GetSchema(ctx context.Context) (map[string]any, error)
- func (s *WeaviateStore) HybridSearch(ctx context.Context, queryText string, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)
- func (s *WeaviateStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)
- func (s *WeaviateStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)
- func (s *WeaviateStore) UpdateDocument(ctx context.Context, doc Document) error
type WebRetrievalResult
type WebRetriever
- func NewWebRetriever(config WebRetrieverConfig, localRetriever *HybridRetriever, ...) *WebRetriever
- func (wr *WebRetriever) Retrieve(ctx context.Context, query string, queryEmbedding []float64) ([]RetrievalResult, error)
type WebRetrieverConfig
- func DefaultWebRetrieverConfig() WebRetrieverConfig
type WebSearchFunc

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func EmbeddingSimilarity ¶

func EmbeddingSimilarity(a, b []float64) float64

EmbeddingSimilarity 计算两个 embedding 向量的余弦相似度

func Float32ToFloat64 ¶

func Float32ToFloat64(v []float32) []float64

Float32ToFloat64 converts a []float32 vector to []float64. Useful when integrating external systems that produce float32 embeddings with AgentFlow's float64-based VectorStore and LowLevelVectorStore interfaces.

func Float64ToFloat32 ¶

func Float64ToFloat32(v []float64) []float32

Float64ToFloat32 converts a []float64 vector to []float32. Useful when sending embeddings to external systems that require float32 precision.

func NewEmbeddingProviderFromConfig ¶

func NewEmbeddingProviderFromConfig(cfg *config.Config, providerType EmbeddingProviderType) (embedding.Provider, error)

NewEmbeddingProviderFromConfig 根据 LLM 配置创建 embedding.Provider。 providerType 指定嵌入提供者类型；为空时默认使用 "openai"。

Types ¶

type ChainVisualization ¶

type ChainVisualization struct {
	Nodes []VisualizationNode `json:"nodes"`
	Edges []VisualizationEdge `json:"edges"`
}

链可视化代表了推理链的可视化

type Chunk ¶

type Chunk struct {
	Content    string         `json:"content"`
	StartPos   int            `json:"start_pos"`
	EndPos     int            `json:"end_pos"`
	Metadata   map[string]any `json:"metadata"`
	TokenCount int            `json:"token_count"`
}

Chunk 文档块

type ChunkingConfig ¶

type ChunkingConfig struct {
	Strategy     ChunkingStrategy `json:"strategy"`       // 分块策略
	ChunkSize    int              `json:"chunk_size"`     // 块大小（tokens）
	ChunkOverlap int              `json:"chunk_overlap"`  // 重叠大小（tokens）
	MinChunkSize int              `json:"min_chunk_size"` // 最小块大小

	// 语义分块参数
	SimilarityThreshold float64 `json:"similarity_threshold"` // 语义相似度阈值

	// 文档感知参数
	PreserveTables     bool `json:"preserve_tables"`      // 保留表格
	PreserveCodeBlocks bool `json:"preserve_code_blocks"` // 保留代码块
	PreserveHeaders    bool `json:"preserve_headers"`     // 保留标题
}

ChunkingConfig 分块配置（基于 2025 最佳实践）

func DefaultChunkingConfig ¶

func DefaultChunkingConfig() ChunkingConfig

DefaultChunkingConfig 默认分块配置（生产级）

type ChunkingStrategy ¶

type ChunkingStrategy string

ChunkingStrategy 分块策略

const (
	ChunkingFixed     ChunkingStrategy = "fixed"     // 固定大小
	ChunkingRecursive ChunkingStrategy = "recursive" // 递归分块
	ChunkingSemantic  ChunkingStrategy = "semantic"  // 语义分块
	ChunkingDocument  ChunkingStrategy = "document"  // 文档感知
)

type Clearable ¶

type Clearable interface {
	ClearAll(ctx context.Context) error
}

Clearable is an optional interface for VectorStore implementations that support clearing all stored data. Use type assertion to check support:

if c, ok := store.(Clearable); ok { c.ClearAll(ctx) }

type ContextProvider ¶

type ContextProvider interface {
	// GenerateContext 为 chunk 生成上下文
	GenerateContext(ctx context.Context, doc Document, chunk string) (string, error)
}

ContextProvider 上下文提供器接口

type ContextualRetrieval ¶

type ContextualRetrieval struct {
	// contains filtered or unexported fields
}

ContextualRetrieval Anthropic 上下文检索（2025 最佳实践）为每个 chunk 添加文档级上下文，提高检索准确率 50-60%

func NewContextualRetrieval ¶

func NewContextualRetrieval(
	retriever *HybridRetriever,
	contextProvider ContextProvider,
	config ContextualRetrievalConfig,
	logger *zap.Logger,
) *ContextualRetrieval

NewContextualRetrieval 创建上下文检索器

func (*ContextualRetrieval) CleanExpiredCache ¶

func (r *ContextualRetrieval) CleanExpiredCache() int

CleanExpiredCache 清理过期缓存

func (*ContextualRetrieval) IndexDocumentsWithContext ¶

func (r *ContextualRetrieval) IndexDocumentsWithContext(ctx context.Context, docs []Document) error

IndexDocumentsWithContext 索引文档（添加上下文）

func (*ContextualRetrieval) Retrieve ¶

func (r *ContextualRetrieval) Retrieve(ctx context.Context, query string, queryEmbedding []float64) ([]RetrievalResult, error)

Retrieve 检索（使用上下文增强）

func (*ContextualRetrieval) UpdateIDFStats ¶

func (r *ContextualRetrieval) UpdateIDFStats(docs []Document)

UpdateIDFStats 更新 IDF 统计信息（在索引文档后调用）

type ContextualRetrievalConfig ¶

type ContextualRetrievalConfig struct {
	// 上下文生成
	UseContextPrefix bool   `json:"use_context_prefix"`
	ContextTemplate  string `json:"context_template"`
	MaxContextLength int    `json:"max_context_length"`

	// 检索增强
	UseReranking  bool    `json:"use_reranking"`
	ContextWeight float64 `json:"context_weight"`

	// 缓存
	CacheContexts bool          `json:"cache_contexts"`
	CacheTTL      time.Duration `json:"cache_ttl"` // 缓存过期时间，默认 1h

	// 分块配置（新增）
	ChunkSize     int  `json:"chunk_size"`      // 分块大小，默认 500
	ChunkOverlap  int  `json:"chunk_overlap"`   // 重叠大小，默认 50
	ChunkByTokens bool `json:"chunk_by_tokens"` // 按 token 还是字符分块

	// BM25 参数（新增）
	BM25K1 float64 `json:"bm25_k1"` // BM25 k1 参数，默认 1.2
	BM25B  float64 `json:"bm25_b"`  // BM25 b 参数，默认 0.75
}

ContextualRetrievalConfig 上下文检索配置

func DefaultContextualRetrievalConfig ¶

func DefaultContextualRetrievalConfig() ContextualRetrievalConfig

DefaultContextualRetrievalConfig 默认配置

type CrossEncoderConfig ¶

type CrossEncoderConfig struct {
	ModelName      string  `json:"model_name"`      // 模型名称
	MaxLength      int     `json:"max_length"`      // 最大输入长度
	BatchSize      int     `json:"batch_size"`      // 批处理大小
	ScoreWeight    float64 `json:"score_weight"`    // 重排序分数权重
	OriginalWeight float64 `json:"original_weight"` // 原始分数权重
}

CrossEncoderConfig Cross-Encoder 配置

func DefaultCrossEncoderConfig ¶

func DefaultCrossEncoderConfig() CrossEncoderConfig

DefaultCrossEncoderConfig 默认配置

type CrossEncoderProvider ¶

type CrossEncoderProvider interface {
	// Score 计算查询-文档对的相关性分数
	Score(ctx context.Context, pairs []QueryDocPair) ([]float64, error)
}

CrossEncoderProvider Cross-Encoder 提供器接口

type CrossEncoderReranker ¶

type CrossEncoderReranker struct {
	// contains filtered or unexported fields
}

CrossEncoderReranker Cross-Encoder 重排序器（生产级）基于 Sentence Transformers 的 Cross-Encoder 模型

func NewCrossEncoderReranker ¶

func NewCrossEncoderReranker(
	provider CrossEncoderProvider,
	config CrossEncoderConfig,
	logger *zap.Logger,
) *CrossEncoderReranker

NewCrossEncoderReranker 创建 Cross-Encoder 重排序器

func (*CrossEncoderReranker) Rerank ¶

func (r *CrossEncoderReranker) Rerank(ctx context.Context, query string, results []RetrievalResult) ([]RetrievalResult, error)

Rerank 重排序

type DedupStats ¶

type DedupStats struct {
	TotalRetrieved    int `json:"total_retrieved"`     // 原始检索结果数
	DedupByID         int `json:"dedup_by_id"`         // 按 ID 去重数量
	DedupBySimilarity int `json:"dedup_by_similarity"` // 按内容相似度去重数量
	FinalCount        int `json:"final_count"`         // 去重后最终数量
}

DedupStats 去重统计

type Document ¶

type Document struct {
	ID        string         `json:"id"`
	Content   string         `json:"content"`
	Metadata  map[string]any `json:"metadata,omitempty"`
	Embedding []float64      `json:"embedding,omitempty"`
}

Document 文档

type DocumentChunker ¶

type DocumentChunker struct {
	// contains filtered or unexported fields
}

DocumentChunker 文档分块器

func NewDocumentChunker ¶

func NewDocumentChunker(config ChunkingConfig, tokenizer Tokenizer, logger *zap.Logger) *DocumentChunker

NewDocumentChunker 创建文档分块器

func (*DocumentChunker) ChunkDocument ¶

func (c *DocumentChunker) ChunkDocument(doc Document) []Chunk

ChunkDocument 分块文档

type DocumentLister ¶

type DocumentLister interface {
	ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)
}

DocumentLister is an optional interface for VectorStore implementations that support listing document IDs with pagination. Use type assertion to check support:

if l, ok := store.(DocumentLister); ok { l.ListDocumentIDs(ctx, 100, 0) }

type Edge ¶

type Edge struct {
	ID         string         `json:"id"`
	Source     string         `json:"source"`
	Target     string         `json:"target"`
	Type       string         `json:"type"`
	Properties map[string]any `json:"properties,omitempty"`
	Weight     float64        `json:"weight"`
}

边缘代表了节点之间的关系.

type EmbeddingProvider ¶

type EmbeddingProvider interface {
	EmbedQuery(ctx context.Context, query string) ([]float64, error)
	EmbedDocuments(ctx context.Context, documents []string) ([][]float64, error)
	Name() string
}

嵌入 Provider 包装嵌入. 供检索使用的提供者。

type EmbeddingProviderType ¶

type EmbeddingProviderType string

EmbeddingProviderType 标识要创建的嵌入提供者。

const (
	EmbeddingOpenAI EmbeddingProviderType = "openai"
	EmbeddingCohere EmbeddingProviderType = "cohere"
	EmbeddingVoyage EmbeddingProviderType = "voyage"
	EmbeddingJina   EmbeddingProviderType = "jina"
	EmbeddingGemini EmbeddingProviderType = "gemini"
)

type EnhancedRetriever ¶

type EnhancedRetriever struct {
	*HybridRetriever
	// contains filtered or unexported fields
}

University Retriever扩展了HybridRetriever,由外部嵌入并重新排序提供者.

func NewCohereRetriever ¶

func NewCohereRetriever(apiKey string, logger *zap.Logger) *EnhancedRetriever

NewCohere Retriever创建了由Cohere嵌入并重排的取回器.

func NewEnhancedRetriever ¶

func NewEnhancedRetriever(cfg EnhancedRetrieverConfig, logger *zap.Logger) *EnhancedRetriever

NewEnhancedRetriever 创建了外部提供者的检索器。

func NewJinaRetriever ¶

func NewJinaRetriever(apiKey string, logger *zap.Logger) *EnhancedRetriever

新JinaRetriever创建取回器,由Jina AI嵌入并重排.

func NewOpenAIRetriever ¶

func NewOpenAIRetriever(apiKey string, logger *zap.Logger) *EnhancedRetriever

新 OpenAIREtriever 创建了带有 OpenAI 嵌入式的检索器。

func NewRetrieverFromConfig ¶

func NewRetrieverFromConfig(cfg *config.Config, opts ...RetrieverOption) (*EnhancedRetriever, error)

NewRetrieverFromConfig 一键创建完整的 EnhancedRetriever。它组装向量存储、嵌入提供者和可选的重排提供者。

func NewVoyageRetriever ¶

func NewVoyageRetriever(apiKey string, logger *zap.Logger) *EnhancedRetriever

NewVoyage Retriever 创建取回器,由Voyage AI嵌入并重排.

func (*EnhancedRetriever) IndexDocumentsWithEmbedding ¶

func (r *EnhancedRetriever) IndexDocumentsWithEmbedding(ctx context.Context, docs []Document) error

索引文件带有Embedding索引文档并生成嵌入.

func (*EnhancedRetriever) RetrieveWithProviders ¶

func (r *EnhancedRetriever) RetrieveWithProviders(ctx context.Context, query string) ([]RetrievalResult, error)

利用外部提供者检索。

type EnhancedRetrieverConfig ¶

type EnhancedRetrieverConfig struct {
	HybridConfig      HybridRetrievalConfig
	EmbeddingProvider EmbeddingProvider
	RerankProvider    RerankProvider
}

增强检索器的增强RetrieverConfig配置。

type EnhancedTokenizer ¶

type EnhancedTokenizer struct{}

EnhancedTokenizer is a CJK-aware token estimator that improves on SimpleTokenizer without requiring external encoding data (no network, no CGO).

Heuristics:

CJK characters: ~1.5 characters per token (Chinese/Japanese/Korean ideographs are typically encoded as 1-2 tokens each in BPE-based tokenizers)
ASCII/Latin text: ~4 characters per token (consistent with GPT-family BPE)
Whitespace-delimited words shorter than 3 chars count as 1 token each

For production accuracy, prefer NewTiktokenAdapter which uses real BPE encoding.

func (*EnhancedTokenizer) CountTokens ¶

func (t *EnhancedTokenizer) CountTokens(text string) int

func (*EnhancedTokenizer) Encode ¶

func (t *EnhancedTokenizer) Encode(text string) []int

type Entity ¶

type Entity struct {
	ID   string `json:"id"`
	Name string `json:"name"`
	Type string `json:"type"`
}

实体代表被提取的实体。

type ExpansionResult ¶

type ExpansionResult struct {
	Original   string      `json:"original"`
	Expansions []string    `json:"expansions"`
	Keywords   []string    `json:"keywords"`
	Intent     QueryIntent `json:"intent"`
}

扩展Result包含带有元数据的扩展查询

type GraphDocument ¶

type GraphDocument struct {
	ID       string         `json:"id"`
	Title    string         `json:"title"`
	Content  string         `json:"content"`
	Metadata map[string]any `json:"metadata,omitempty"`
	Entities []Entity       `json:"entities,omitempty"`
}

GraphDocument是用于索引的文档。

type GraphEmbedder ¶

type GraphEmbedder interface {
	Embed(ctx context.Context, text string) ([]float64, error)
}

GraphEmbedder 生成嵌入式.

type GraphRAG ¶

type GraphRAG struct {
	// contains filtered or unexported fields
}

GraphRAG结合了知识图和向量检索.

func NewGraphRAG ¶

func NewGraphRAG(graph *KnowledgeGraph, vectorStore LowLevelVectorStore, embedder GraphEmbedder, config GraphRAGConfig, logger *zap.Logger) *GraphRAG

NewGraphRAG创建了一个新的GraphRAG实例.

func (*GraphRAG) AddDocument ¶

func (r *GraphRAG) AddDocument(ctx context.Context, doc GraphDocument) error

添加文件将文档添加到图表和向量存储中。

func (*GraphRAG) Retrieve ¶

func (r *GraphRAG) Retrieve(ctx context.Context, query string) ([]GraphRetrievalResult, error)

检索进行混合检索。

type GraphRAGConfig ¶

type GraphRAGConfig struct {
	GraphWeight   float64 `json:"graph_weight"`    // Weight for graph results
	VectorWeight  float64 `json:"vector_weight"`   // Weight for vector results
	MaxGraphDepth int     `json:"max_graph_depth"` // Max traversal depth
	MaxResults    int     `json:"max_results"`
	MinScore      float64 `json:"min_score"`
}

GraphRAGConfig 配置了 GraphRAG.

func DefaultGraphRAGConfig ¶

func DefaultGraphRAGConfig() GraphRAGConfig

默认 GraphRAGConfig 返回默认配置。

type GraphRetrievalResult ¶

type GraphRetrievalResult struct {
	ID           string         `json:"id"`
	Content      string         `json:"content"`
	Score        float64        `json:"score"`
	GraphScore   float64        `json:"graph_score"`
	VectorScore  float64        `json:"vector_score"`
	Source       string         `json:"source"` // "graph", "vector", "hybrid"
	Metadata     map[string]any `json:"metadata,omitempty"`
	RelatedNodes []*Node        `json:"related_nodes,omitempty"`
}

Graph Retrival Result 代表混合检索结果.

type HNSWConfig ¶

type HNSWConfig struct {
	M              int     `json:"m"`               // 每层最大连接数（12-48）
	EfConstruction int     `json:"ef_construction"` // 构建时搜索宽度（100-200）
	EfSearch       int     `json:"ef_search"`       // 搜索时宽度（50-200）
	MaxLevel       int     `json:"max_level"`       // 最大层数
	Ml             float64 `json:"ml"`              // 层数归一化因子
}

HNSWConfig HNSW 配置

func AdaptiveHNSWConfig ¶

func AdaptiveHNSWConfig(dataSize int) HNSWConfig

AdaptiveHNSWConfig 自适应 HNSW 配置（根据数据规模动态调整）基于 2025 最佳实践：小数据集用小 M，大数据集用大 M

func DefaultHNSWConfig ¶

func DefaultHNSWConfig() HNSWConfig

DefaultHNSWConfig 默认 HNSW 配置（生产级）

type HNSWIndex ¶

type HNSWIndex struct {
	// contains filtered or unexported fields
}

HNSWIndex HNSW 索引（Hierarchical Navigable Small World）

func NewHNSWIndex ¶

func NewHNSWIndex(config HNSWConfig, logger *zap.Logger) *HNSWIndex

NewHNSWIndex 创建 HNSW 索引

func (*HNSWIndex) Add ¶

func (idx *HNSWIndex) Add(vector []float64, id string) error

Add 添加向量

func (*HNSWIndex) Build ¶

func (idx *HNSWIndex) Build(vectors [][]float64, ids []string) error

Build 构建 HNSW 索引

func (*HNSWIndex) Delete ¶

func (idx *HNSWIndex) Delete(id string) error

Delete 删除向量

func (*HNSWIndex) Search ¶

func (idx *HNSWIndex) Search(query []float64, k int) ([]SearchResult, error)

Search 搜索最近邻

func (*HNSWIndex) Size ¶

func (idx *HNSWIndex) Size() int

Size 索引大小

type HopType ¶

type HopType string

HopType 代表推理跳的类型

const (
	HopTypeInitial      HopType = "initial"      // Initial query retrieval
	HopTypeFollowUp     HopType = "follow_up"    // Follow-up based on previous results
	HopTypeDecomposed   HopType = "decomposed"   // Sub-query from decomposition
	HopTypeRefinement   HopType = "refinement"   // Query refinement based on context
	HopTypeVerification HopType = "verification" // Verify or cross-check information
	HopTypeBridging     HopType = "bridging"     // Bridge between concepts
)

type HybridRetrievalConfig ¶

type HybridRetrievalConfig struct {
	// BM25 配置
	UseBM25    bool    `json:"use_bm25"`
	BM25Weight float64 `json:"bm25_weight"`
	BM25K1     float64 `json:"bm25_k1"` // BM25 参数 k1 (1.2-2.0)
	BM25B      float64 `json:"bm25_b"`  // BM25 参数 b (0.75)

	// 向量检索配置
	UseVector    bool    `json:"use_vector"`
	VectorWeight float64 `json:"vector_weight"`

	// Reranking 配置
	UseReranking bool `json:"use_reranking"`
	RerankTopK   int  `json:"rerank_top_k"`

	// 检索参数
	TopK     int     `json:"top_k"`
	MinScore float64 `json:"min_score"`
}

HybridRetrievalConfig 混合检索配置（基于 2025 年最佳实践）

func DefaultHybridRetrievalConfig ¶

func DefaultHybridRetrievalConfig() HybridRetrievalConfig

DefaultHybridRetrievalConfig 返回默认混合检索配置

type HybridRetriever ¶

type HybridRetriever struct {
	// contains filtered or unexported fields
}

HybridRetriever 混合检索器

func NewHybridRetriever ¶

func NewHybridRetriever(config HybridRetrievalConfig, logger *zap.Logger) *HybridRetriever

NewHybridRetriever 创建混合检索器

func NewHybridRetrieverWithVectorStore ¶

func NewHybridRetrieverWithVectorStore(
	config HybridRetrievalConfig,
	vectorStore VectorStore,
	logger *zap.Logger,
) *HybridRetriever

NewHybridRetrieverWithVectorStore 创建带向量存储的混合检索器

func (*HybridRetriever) IndexDocuments ¶

func (r *HybridRetriever) IndexDocuments(docs []Document) error

IndexDocuments 索引文档

func (*HybridRetriever) Retrieve ¶

func (r *HybridRetriever) Retrieve(ctx context.Context, query string, queryEmbedding []float64) ([]RetrievalResult, error)

Retrieve 混合检索

type InMemoryVectorStore ¶

type InMemoryVectorStore struct {
	// contains filtered or unexported fields
}

InMemoryVectorStore 内存向量存储

func NewInMemoryVectorStore ¶

func NewInMemoryVectorStore(logger *zap.Logger) *InMemoryVectorStore

NewInMemoryVectorStore 创建内存向量存储

func (*InMemoryVectorStore) AddDocuments ¶

func (s *InMemoryVectorStore) AddDocuments(ctx context.Context, docs []Document) error

AddDocuments 添加文档

func (*InMemoryVectorStore) ClearAll ¶

func (s *InMemoryVectorStore) ClearAll(ctx context.Context) error

ClearAll removes all documents from the in-memory store.

func (*InMemoryVectorStore) Count ¶

func (s *InMemoryVectorStore) Count(ctx context.Context) (int, error)

计数返回文档计数

func (*InMemoryVectorStore) DeleteDocuments ¶

func (s *InMemoryVectorStore) DeleteDocuments(ctx context.Context, ids []string) error

DeleteDocuments 删除文档

func (*InMemoryVectorStore) ListDocumentIDs ¶

func (s *InMemoryVectorStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)

ListDocumentIDs returns a paginated list of document IDs.

func (*InMemoryVectorStore) Search ¶

func (s *InMemoryVectorStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)

Search 搜索相似文档

func (*InMemoryVectorStore) UpdateDocument ¶

func (s *InMemoryVectorStore) UpdateDocument(ctx context.Context, doc Document) error

UpdateDocument 更新文档

type IndexType ¶

type IndexType string

IndexType 索引类型

const (
	IndexFlat IndexType = "flat" // 暴力搜索
	IndexHNSW IndexType = "hnsw" // HNSW 图索引
	IndexIVF  IndexType = "ivf"  // IVF 聚类索引
)

type KnowledgeGraph ¶

type KnowledgeGraph struct {
	// contains filtered or unexported fields
}

KnowledgeGraph提供记忆知识图操作.

func NewKnowledgeGraph ¶

func NewKnowledgeGraph(logger *zap.Logger) *KnowledgeGraph

NewKnowledgeGraph创建了新的知识图.

func (*KnowledgeGraph) AddEdge ¶

func (g *KnowledgeGraph) AddEdge(edge *Edge)

添加Edge在图中添加了边缘.

func (*KnowledgeGraph) AddNode ¶

func (g *KnowledgeGraph) AddNode(node *Node)

添加节点在图表中添加了节点。

func (*KnowledgeGraph) GetNeighbors ¶

func (g *KnowledgeGraph) GetNeighbors(nodeID string, depth int) []*Node

Get nearbors return 邻居的节点。

func (*KnowledgeGraph) GetNode ¶

func (g *KnowledgeGraph) GetNode(id string) (*Node, bool)

GetNode通过ID检索到一个节点.

func (*KnowledgeGraph) QueryByType ¶

func (g *KnowledgeGraph) QueryByType(nodeType string) []*Node

查询ByType返回特定类型的节点。

type LLMContextProvider ¶

type LLMContextProvider struct {
	// contains filtered or unexported fields
}

LLMContextProvider 基于 LLM 的上下文生成器

func NewLLMContextProvider ¶

func NewLLMContextProvider(
	llmProvider func(context.Context, string) (string, error),
	logger *zap.Logger,
) *LLMContextProvider

NewLLMContextProvider 创建 LLM 上下文提供器

func (*LLMContextProvider) GenerateContext ¶

func (p *LLMContextProvider) GenerateContext(ctx context.Context, doc Document, chunk string) (string, error)

GenerateContext 生成上下文

type LLMReranker ¶

type LLMReranker struct {
	// contains filtered or unexported fields
}

LLMReranker LLM 重排序器（使用 LLM 判断相关性）

func NewLLMReranker ¶

func NewLLMReranker(
	provider LLMRerankerProvider,
	config LLMRerankerConfig,
	logger *zap.Logger,
) *LLMReranker

NewLLMReranker 创建 LLM 重排序器

func (*LLMReranker) Rerank ¶

func (r *LLMReranker) Rerank(ctx context.Context, query string, results []RetrievalResult) ([]RetrievalResult, error)

Rerank 重排序

type LLMRerankerConfig ¶

type LLMRerankerConfig struct {
	MaxCandidates  int     `json:"max_candidates"`  // 最大候选数
	Temperature    float64 `json:"temperature"`     // 温度
	PromptTemplate string  `json:"prompt_template"` // 提示模板
}

LLMRerankerConfig LLM 重排序配置

func DefaultLLMRerankerConfig ¶

func DefaultLLMRerankerConfig() LLMRerankerConfig

DefaultLLMRerankerConfig 默认配置

type LLMRerankerProvider ¶

type LLMRerankerProvider interface {
	// ScoreRelevance 评估相关性
	ScoreRelevance(ctx context.Context, query, document string) (float64, error)
}

LLMRerankerProvider LLM 重排序提供器

type LLMTokenizerAdapter ¶

type LLMTokenizerAdapter struct {
	// contains filtered or unexported fields
}

LLMTokenizerAdapter 将 llm/tokenizer.Tokenizer 适配为 rag.Tokenizer 接口。当底层 tokenizer 返回 error 时，回退到字符估算并记录警告日志。

func NewLLMTokenizerAdapter ¶

func NewLLMTokenizerAdapter(inner lltok.Tokenizer, logger *zap.Logger) *LLMTokenizerAdapter

NewLLMTokenizerAdapter 创建适配器。

func (*LLMTokenizerAdapter) CountTokens ¶

func (a *LLMTokenizerAdapter) CountTokens(text string) int

CountTokens 返回文本的 token 数。底层 tokenizer 出错时回退到 len(text)/4 估算。

func (*LLMTokenizerAdapter) Encode ¶

func (a *LLMTokenizerAdapter) Encode(text string) []int

Encode 将文本转换为 token ID 列表。底层 tokenizer 出错时回退到伪 token ID 序列。

type LowLevelSearchResult ¶ added in v1.0.0

type LowLevelSearchResult struct {
	ID       string         `json:"id"`
	Score    float64        `json:"score"`
	Metadata map[string]any `json:"metadata"`
}

LowLevelSearchResult is the search result for LowLevelVectorStore.

type LowLevelVectorStore ¶ added in v1.0.0

type LowLevelVectorStore interface {
	// Store stores a vector with its ID and metadata.
	Store(ctx context.Context, id string, vector []float64, metadata map[string]any) error

	// Search finds the top-K most similar vectors, optionally filtered by metadata.
	Search(ctx context.Context, query []float64, topK int, filter map[string]any) ([]LowLevelSearchResult, error)

	// Delete removes a vector by ID.
	Delete(ctx context.Context, id string) error
}

LowLevelVectorStore is the low-level vector storage interface for raw vectors with metadata. Used by memory systems and graph RAG. For document-level operations, use VectorStore.

type MilvusConfig ¶

type MilvusConfig struct {
	// 连接设置
	Host    string `json:"host"`
	Port    int    `json:"port"`
	BaseURL string `json:"base_url,omitempty"` // Override host:port if set

	// 认证
	Username string `json:"username,omitempty"`
	Password string `json:"password,omitempty"`
	Token    string `json:"token,omitempty"` // For Zilliz Cloud

	// 收藏设置
	Collection string `json:"collection"`
	Database   string `json:"database,omitempty"` // Default: "default"

	// Schema 设置
	VectorDimension int    `json:"vector_dimension,omitempty"` // Required for auto-create
	PrimaryField    string `json:"primary_field,omitempty"`    // Default: "id"
	VectorField     string `json:"vector_field,omitempty"`     // Default: "vector"
	ContentField    string `json:"content_field,omitempty"`    // Default: "content"
	MetadataField   string `json:"metadata_field,omitempty"`   // Default: "metadata"

	// 索引设置
	IndexType   MilvusIndexType  `json:"index_type,omitempty"`   // Default: IVF_FLAT
	MetricType  MilvusMetricType `json:"metric_type,omitempty"`  // Default: COSINE
	IndexParams map[string]any   `json:"index_params,omitempty"` // Index-specific params

	// 搜索设置
	SearchParams map[string]any `json:"search_params,omitempty"` // Search-specific params

	// 行为设置
	AutoCreateCollection bool          `json:"auto_create_collection,omitempty"`
	Timeout              time.Duration `json:"timeout,omitempty"`
	BatchSize            int           `json:"batch_size,omitempty"` // For batch operations

	// 一致性水平:强、会、会、会、终
	ConsistencyLevel string `json:"consistency_level,omitempty"`
}

MilvusConfig配置了Milvus矢量Store执行.

type MilvusIndexType ¶

type MilvusIndexType string

MilvusIndexType定义了Milvus向量搜索的索引类型.

const (
	// MilvusIndexIVFFlat是IVF FLAT指数类型(速度和准确性的良好平衡).
	MilvusIndexIVFFlat MilvusIndexType = "IVF_FLAT"
	// MilvusIndexHNSW是HNSW指数类型(高精度,多为内存).
	MilvusIndexHNSW MilvusIndexType = "HNSW"
	// MilvusIndexFlat是FLAT指数类型(Brute force,最高精度).
	MilvusIndexFlat MilvusIndexType = "FLAT"
	// MilvusIndexIVFSQ8是IVF SQ8指数类型(压缩,速度快但准确度更低).
	MilvusIndexIVFSQ8 MilvusIndexType = "IVF_SQ8"
	// MilvusIndexIVFPQ是IVF PQ指数类型(高度压缩,速度最快但最不准确).
	MilvusIndexIVFPQ MilvusIndexType = "IVF_PQ"
)

type MilvusMetricType ¶

type MilvusMetricType string

MilvusMetricType定义了Milvus向量搜索的距离度量.

const (
	// MilvusMetricL2是欧几里得相距度量衡.
	MilvusMetricL2 MilvusMetricType = "L2"
	// MilvusMetricIP是内产物(宇宙相似性)的度量衡.
	MilvusMetricIP MilvusMetricType = "IP"
	// 密尔武斯Metric Cosine是克辛类似度量衡.
	MilvusMetricCosine MilvusMetricType = "COSINE"
)

type MilvusStore ¶

type MilvusStore struct {
	// contains filtered or unexported fields
}

米尔武斯斯托尔执行矢量Store使用米尔武斯REST API(v2).

func NewMilvusStore ¶

func NewMilvusStore(cfg MilvusConfig, logger *zap.Logger) *MilvusStore

NewMilvusStore 创建了由米尔武斯支撑的"矢量".

func (*MilvusStore) AddDocuments ¶

func (s *MilvusStore) AddDocuments(ctx context.Context, docs []Document) error

添加文档将文档添加到米尔武斯收藏中。

func (*MilvusStore) ClearAll ¶

func (s *MilvusStore) ClearAll(ctx context.Context) error

ClearAll drops and recreates the Milvus collection, effectively removing all data. The collection schema and index will be recreated on the next AddDocuments call.

func (*MilvusStore) Count ¶

func (s *MilvusStore) Count(ctx context.Context) (int, error)

计数返回 Milvus 收藏中的文档数。

func (*MilvusStore) DeleteDocuments ¶

func (s *MilvusStore) DeleteDocuments(ctx context.Context, ids []string) error

删除文档删除米尔武斯收藏中的文档。

func (*MilvusStore) DropCollection ¶

func (s *MilvusStore) DropCollection(ctx context.Context) error

Drop Collection 将收藏放下( 谨慎使用) 。

func (*MilvusStore) Flush ¶

func (s *MilvusStore) Flush(ctx context.Context) error

Flush冲出收集器,以确保数据的持久性.

func (*MilvusStore) ListDocumentIDs ¶

func (s *MilvusStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)

ListDocumentIDs returns a paginated list of document IDs stored in the Milvus collection. It uses the query API with an output field of "doc_id" to retrieve original document IDs.

func (*MilvusStore) Search ¶

func (s *MilvusStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)

在 Milvus 收藏中搜索类似的文档。

func (*MilvusStore) UpdateDocument ¶

func (s *MilvusStore) UpdateDocument(ctx context.Context, doc Document) error

更新文档更新米尔武斯收藏中的文档。

type MultiHopConfig ¶

type MultiHopConfig struct {
	// 跳跃限制
	MaxHops      int           `json:"max_hops"`      // Maximum number of hops (2-5)
	MinHops      int           `json:"min_hops"`      // Minimum hops before stopping
	HopTimeout   time.Duration `json:"hop_timeout"`   // Timeout per hop
	TotalTimeout time.Duration `json:"total_timeout"` // Total reasoning timeout

	// 检索设置
	ResultsPerHop     int     `json:"results_per_hop"`     // Documents per hop
	MinConfidence     float64 `json:"min_confidence"`      // Minimum confidence to continue
	ContextWindowSize int     `json:"context_window_size"` // Max context tokens

	// 原因设置
	EnableLLMReasoning    bool    `json:"enable_llm_reasoning"`    // Use LLM for reasoning
	EnableQueryRefinement bool    `json:"enable_query_refinement"` // Refine queries between hops
	EnableVerification    bool    `json:"enable_verification"`     // Verify answers
	ConfidenceThreshold   float64 `json:"confidence_threshold"`    // Stop if confidence exceeds

	// 复制
	DeduplicateResults  bool    `json:"deduplicate_results"`  // Remove duplicate documents
	SimilarityThreshold float64 `json:"similarity_threshold"` // Threshold for deduplication

	// 缓存
	EnableCache bool          `json:"enable_cache"`
	CacheTTL    time.Duration `json:"cache_ttl"`
}

MultiHopConfig 配置多跳推理系统

func DefaultMultiHopConfig ¶

func DefaultMultiHopConfig() MultiHopConfig

默认多HopConfig 返回默认配置

type MultiHopReasoner ¶

type MultiHopReasoner struct {
	// contains filtered or unexported fields
}

MultiHopReasoner 对文档进行多跳推理

func NewMultiHopReasoner ¶

func NewMultiHopReasoner(
	config MultiHopConfig,
	retriever *HybridRetriever,
	queryTransformer *QueryTransformer,
	llmProvider QueryLLMProvider,
	embeddingFunc func(context.Context, string) ([]float64, error),
	logger *zap.Logger,
) *MultiHopReasoner

新建多功能读取器

func (*MultiHopReasoner) Reason ¶

func (r *MultiHopReasoner) Reason(ctx context.Context, query string) (*ReasoningChain, error)

为查询进行多跳推理

func (*MultiHopReasoner) ReasonBatch ¶

func (r *MultiHopReasoner) ReasonBatch(ctx context.Context, queries []string) ([]*ReasoningChain, error)

理由Batch 执行多个查询的多跳推理

type MultiStrategyDecision ¶

type MultiStrategyDecision struct {
	Query      string               `json:"query"`
	Strategies []StrategyWithWeight `json:"strategies"`
	Reasoning  string               `json:"reasoning,omitempty"`
	Timestamp  time.Time            `json:"timestamp"`
}

多战略决定代表使用多战略的决定

type Node ¶

type Node struct {
	ID         string         `json:"id"`
	Type       string         `json:"type"`
	Label      string         `json:"label"`
	Properties map[string]any `json:"properties,omitempty"`
	Embedding  []float64      `json:"embedding,omitempty"`
	CreatedAt  time.Time      `json:"created_at"`
}

节点代表了知识图中的节点.

type PineconeConfig ¶

type PineconeConfig struct {
	APIKey    string        `json:"api_key"`
	Index     string        `json:"index,omitempty"`    // Used to resolve BaseURL if BaseURL is empty
	BaseURL   string        `json:"base_url,omitempty"` // Data-plane base URL (preferred if known)
	Namespace string        `json:"namespace,omitempty"`
	Timeout   time.Duration `json:"timeout,omitempty"`

	ControllerBaseURL string `json:"controller_base_url,omitempty"` // Default: https://api.pinecone.io

	// 存储在元数据中的有效载荷字段 。
	MetadataContentField string `json:"metadata_content_field,omitempty"` // Default: "content"
}

PineconeConfig 配置 Pinecone 矢量执行。

要使用Pinecone,你也需要: - BaseURL(数据-飞机主机,例如https://<index>-<project>.svc.<region>.pinecone.io),或 - Index,在这种情况下,商店将通过控制器API解决主机.

type PineconeStore ¶

type PineconeStore struct {
	// contains filtered or unexported fields
}

PineconeStore使用Pinecone的REST API执行矢量Store.

func NewPineconeStore ¶

func NewPineconeStore(cfg PineconeConfig, logger *zap.Logger) *PineconeStore

新PineconeStore创建了皮内可酮后置矢量Store.

func (*PineconeStore) AddDocuments ¶

func (s *PineconeStore) AddDocuments(ctx context.Context, docs []Document) error

func (*PineconeStore) ClearAll ¶

func (s *PineconeStore) ClearAll(ctx context.Context) error

ClearAll deletes all vectors from the Pinecone index (or namespace).

func (*PineconeStore) Count ¶

func (s *PineconeStore) Count(ctx context.Context) (int, error)

func (*PineconeStore) DeleteDocuments ¶

func (s *PineconeStore) DeleteDocuments(ctx context.Context, ids []string) error

func (*PineconeStore) ListDocumentIDs ¶

func (s *PineconeStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)

ListDocumentIDs returns a paginated list of vector IDs stored in the Pinecone index. It uses the list vectors API endpoint. The offset parameter is used to skip results by fetching offset+limit IDs and discarding the first offset entries.

func (*PineconeStore) Search ¶

func (s *PineconeStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)

func (*PineconeStore) UpdateDocument ¶

func (s *PineconeStore) UpdateDocument(ctx context.Context, doc Document) error

type QdrantConfig ¶

type QdrantConfig struct {
	Host       string        `json:"host"`
	Port       int           `json:"port"`
	BaseURL    string        `json:"base_url,omitempty"`
	APIKey     string        `json:"api_key,omitempty"`
	Collection string        `json:"collection"`
	Timeout    time.Duration `json:"timeout,omitempty"`

	AutoCreateCollection bool   `json:"auto_create_collection,omitempty"`
	Distance             string `json:"distance,omitempty"`     // Cosine (default), Dot, Euclid
	VectorSize           int    `json:"vector_size,omitempty"`  // Optional override; defaults to len(embedding)
	Wait                 *bool  `json:"wait,omitempty"`         // Wait for operation completion (default true)
	PayloadContentField  string `json:"payload_content_field"`  // Payload key for document content (default "content")
	PayloadMetadataField string `json:"payload_metadata_field"` // Payload key for document metadata (default "metadata")
	PayloadIDField       string `json:"payload_id_field"`       // Payload key for original document ID (default "doc_id")
}

QdrantConfig 配置了 Qdrant 矢量Store 执行。

注释: - Qdrant点ID是UUID;AgentFlow从文档中获得稳定的UUID. 身份证 - 文件内容/元数据储存在有效载荷中(最佳JSON)。

type QdrantStore ¶

type QdrantStore struct {
	// contains filtered or unexported fields
}

QdrantStore使用Qdrant的REST API执行矢量Store.

func NewQdrantStore ¶

func NewQdrantStore(cfg QdrantConfig, logger *zap.Logger) *QdrantStore

新克德兰特斯多尔创建了克德兰特后卫矢量斯多尔.

func (*QdrantStore) AddDocuments ¶

func (s *QdrantStore) AddDocuments(ctx context.Context, docs []Document) error

func (*QdrantStore) ClearAll ¶

func (s *QdrantStore) ClearAll(ctx context.Context) error

ClearAll deletes all points from the Qdrant collection. It uses the delete-by-filter API with a match-all filter to remove all points while preserving the collection schema.

func (*QdrantStore) Count ¶

func (s *QdrantStore) Count(ctx context.Context) (int, error)

func (*QdrantStore) DeleteDocuments ¶

func (s *QdrantStore) DeleteDocuments(ctx context.Context, ids []string) error

func (*QdrantStore) ListDocumentIDs ¶

func (s *QdrantStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)

ListDocumentIDs returns a paginated list of document IDs stored in the Qdrant collection. It uses the scroll API to retrieve points and extracts the original document ID from the payload.

func (*QdrantStore) Search ¶

func (s *QdrantStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)

func (*QdrantStore) UpdateDocument ¶

func (s *QdrantStore) UpdateDocument(ctx context.Context, doc Document) error

type QueryDocPair ¶

type QueryDocPair struct {
	Query    string
	Document string
}

QueryDocPair 查询-文档对

type QueryFeatures ¶

type QueryFeatures struct {
	Intent      QueryIntent `json:"intent"`
	Complexity  string      `json:"complexity"` // "low", "medium", "high"
	Length      string      `json:"length"`     // "short", "medium", "long"
	HasEntities bool        `json:"has_entities"`
	HasKeywords bool        `json:"has_keywords"`
	IsQuestion  bool        `json:"is_question"`
	Keywords    []string    `json:"keywords"`
	Entities    []string    `json:"entities"`
	WordCount   int         `json:"word_count"`
}

查询Features 代表已分析的查询特性

type QueryIntent ¶

type QueryIntent string

查询意向表示用户查询的检测意图

const (
	IntentFactual      QueryIntent = "factual"      // Simple fact lookup
	IntentComparison   QueryIntent = "comparison"   // Compare multiple items
	IntentExplanation  QueryIntent = "explanation"  // Explain a concept
	IntentProcedural   QueryIntent = "procedural"   // How-to questions
	IntentAnalytical   QueryIntent = "analytical"   // Analysis/reasoning required
	IntentCreative     QueryIntent = "creative"     // Creative/generative tasks
	IntentAggregation  QueryIntent = "aggregation"  // Aggregate information
	IntentTemporal     QueryIntent = "temporal"     // Time-based queries
	IntentCausal       QueryIntent = "causal"       // Cause-effect relationships
	IntentHypothetical QueryIntent = "hypothetical" // What-if scenarios
	IntentUnknown      QueryIntent = "unknown"      // Cannot determine intent
)

type QueryLLMProvider ¶

type QueryLLMProvider interface {
	// 完整生成给定快件的补全
	Complete(ctx context.Context, prompt string) (string, error)
}

基于 LLM 的查询界面

type QueryRouter ¶

type QueryRouter struct {
	// contains filtered or unexported fields
}

查询路透查询到适当的检索策略

func NewQueryRouter ¶

func NewQueryRouter(
	config QueryRouterConfig,
	queryTransformer *QueryTransformer,
	llmProvider QueryLLMProvider,
	logger *zap.Logger,
) *QueryRouter

新建查询路由器创建新的查询路由器

func (*QueryRouter) GetStrategyStats ¶

func (r *QueryRouter) GetStrategyStats() map[RetrievalStrategy]StrategyStats

获取战略数据返回每个战略的统计数据

func (*QueryRouter) RecordFeedback ¶

func (r *QueryRouter) RecordFeedback(feedback RoutingFeedback)

记录 Feedback 记录对路径决定的反馈

func (*QueryRouter) Route ¶

func (r *QueryRouter) Route(ctx context.Context, query string) (*RoutingDecision, error)

路由决定查询的最佳检索策略

func (*QueryRouter) RouteBatch ¶

func (r *QueryRouter) RouteBatch(ctx context.Context, queries []string) ([]*RoutingDecision, error)

RouteBatch 路线多个查询

func (*QueryRouter) RouteMulti ¶

func (r *QueryRouter) RouteMulti(ctx context.Context, query string, maxStrategies int) (*MultiStrategyDecision, error)

Route Multision 确定多个组合检索策略

type QueryRouterConfig ¶

type QueryRouterConfig struct {
	// 战略配置
	Strategies []StrategyConfig `json:"strategies"`

	// 默认策略
	DefaultStrategy RetrievalStrategy `json:"default_strategy"`

	// 运行设置
	EnableLLMRouting      bool    `json:"enable_llm_routing"`      // Use LLM for routing decisions
	EnableAdaptiveRouting bool    `json:"enable_adaptive_routing"` // Learn from feedback
	ConfidenceThreshold   float64 `json:"confidence_threshold"`    // Min confidence for routing

	// 后退设置
	EnableFallback   bool              `json:"enable_fallback"`
	FallbackStrategy RetrievalStrategy `json:"fallback_strategy"`

	// 缓存
	EnableCache bool          `json:"enable_cache"`
	CacheTTL    time.Duration `json:"cache_ttl"`

	// 日志
	LogDecisions bool `json:"log_decisions"`
}

查询路透社 Config 配置查询路由器

func DefaultQueryRouterConfig ¶

func DefaultQueryRouterConfig() QueryRouterConfig

默认查询程序 Config 返回默认配置

type QueryTransformConfig ¶

type QueryTransformConfig struct {
	// 扩展设置
	EnableExpansion    bool    `json:"enable_expansion"`
	MaxExpansions      int     `json:"max_expansions"`      // Max expanded queries (3-5)
	ExpansionDiversity float64 `json:"expansion_diversity"` // 0-1, higher = more diverse

	// 重写设置
	EnableRewriting     bool `json:"enable_rewriting"`
	RewriteForRetrieval bool `json:"rewrite_for_retrieval"` // Optimize for retrieval

	// 分解设置
	EnableDecomposition bool    `json:"enable_decomposition"`
	MaxSubQueries       int     `json:"max_sub_queries"`     // Max sub-queries (2-5)
	DecomposeThreshold  float64 `json:"decompose_threshold"` // Complexity threshold

	// 有意检测
	EnableIntentDetection bool `json:"enable_intent_detection"`

	// HyDE( 嵌入式文档)
	EnableHyDE        bool `json:"enable_hyde"`
	HyDEDocumentCount int  `json:"hyde_document_count"` // Number of hypothetical docs

	// 后退提示
	EnableStepBack bool `json:"enable_step_back"`

	// 缓存
	EnableCache bool          `json:"enable_cache"`
	CacheTTL    time.Duration `json:"cache_ttl"`

	// LLM 设置
	UseLLM      bool    `json:"use_llm"`     // Use LLM for transformations
	Temperature float64 `json:"temperature"` // LLM temperature
}

查询 TransformConfig 配置查询转换器

func DefaultQueryTransformConfig ¶

func DefaultQueryTransformConfig() QueryTransformConfig

默认查询 TransformConfig 返回默认配置

type QueryTransformer ¶

type QueryTransformer struct {
	// contains filtered or unexported fields
}

Query Transfer 为更好的检索而转换查询

func NewQueryTransformer ¶

func NewQueryTransformer(
	config QueryTransformConfig,
	llmProvider QueryLLMProvider,
	logger *zap.Logger,
) *QueryTransformer

新建查询转换器创建新查询转换器

func (*QueryTransformer) Expand ¶

func (t *QueryTransformer) Expand(ctx context.Context, query string) ([]string, error)

扩展生成多个相关查询以更好地召回

func (*QueryTransformer) ExpandWithMetadata ¶

func (t *QueryTransformer) ExpandWithMetadata(ctx context.Context, query string) (*ExpansionResult, error)

展开WithMetadata 扩展查询并返回详细结果

func (*QueryTransformer) Transform ¶

func (t *QueryTransformer) Transform(ctx context.Context, query string) (*TransformedQuery, error)

将所有启用的转换应用到查询中

func (*QueryTransformer) TransformBatch ¶

func (t *QueryTransformer) TransformBatch(ctx context.Context, queries []string) ([]*TransformedQuery, error)

TransformBatch 同步转换多个查询

type ReasoningChain ¶

type ReasoningChain struct {
	ID              string          `json:"id"`
	OriginalQuery   string          `json:"original_query"`
	Hops            []ReasoningHop  `json:"hops"`
	FinalAnswer     string          `json:"final_answer,omitempty"`
	FinalContext    string          `json:"final_context"`
	Status          ReasoningStatus `json:"status"`
	TotalDuration   time.Duration   `json:"total_duration"`
	TotalRetrieval  int             `json:"total_retrieval"`  // Total documents retrieved
	UniqueDocuments int             `json:"unique_documents"` // Unique documents
	Metadata        map[string]any  `json:"metadata,omitempty"`
	CreatedAt       time.Time       `json:"created_at"`
	CompletedAt     time.Time       `json:"completed_at,omitempty"`

	// 全局去重统计（新增）
	TotalDedupByID         int `json:"total_dedup_by_id"`
	TotalDedupBySimilarity int `json:"total_dedup_by_similarity"`
}

理由链表示完整的推理链

func (*ReasoningChain) FromJSON ¶

func (c *ReasoningChain) FromJSON(data []byte) error

从JSON 解析一个推理链从JSON

func (*ReasoningChain) GetAllDocuments ¶

func (c *ReasoningChain) GetAllDocuments() []Document

Get AllDocuments 从链条返回所有唯一的文档

func (*ReasoningChain) GetHop ¶

func (c *ReasoningChain) GetHop(hopNum int) *ReasoningHop

GetHop 按数字返回一个特定的跳

func (*ReasoningChain) GetTopDocuments ¶

func (c *ReasoningChain) GetTopDocuments(k int) []RetrievalResult

GetTopDocuments 在所有跳跃中按分数返回上行文档

func (*ReasoningChain) ToJSON ¶

func (c *ReasoningChain) ToJSON() ([]byte, error)

ToJSON将推理链序列化为JSON

func (*ReasoningChain) Visualize ¶

func (c *ReasoningChain) Visualize() *ChainVisualization

可视化创建推理链可视化

type ReasoningHop ¶

type ReasoningHop struct {
	ID               string            `json:"id"`
	HopNumber        int               `json:"hop_number"`
	Type             HopType           `json:"type"`
	Query            string            `json:"query"`
	TransformedQuery string            `json:"transformed_query,omitempty"`
	Results          []RetrievalResult `json:"results"`
	Context          string            `json:"context,omitempty"`   // Accumulated context
	Reasoning        string            `json:"reasoning,omitempty"` // LLM reasoning for this hop
	Confidence       float64           `json:"confidence"`
	Duration         time.Duration     `json:"duration"`
	Metadata         map[string]any    `json:"metadata,omitempty"`
	Timestamp        time.Time         `json:"timestamp"`

	// 去重统计（新增）
	DedupStats *DedupStats `json:"dedup_stats,omitempty"`
}

ReasoningHop代表推理链中的一跳

type ReasoningStatus ¶

type ReasoningStatus string

理由状态代表推理过程状态.

const (
	StatusInProgress ReasoningStatus = "in_progress"
	StatusCompleted  ReasoningStatus = "completed"
	StatusFailed     ReasoningStatus = "failed"
	StatusTimeout    ReasoningStatus = "timeout"
)

type RerankProvider ¶

type RerankProvider interface {
	RerankSimple(ctx context.Context, query string, documents []string, topN int) ([]rerank.RerankResult, error)
	Name() string
}

Provider包装重新排序. 供检索使用的提供者。

type RerankProviderType ¶

type RerankProviderType string

RerankProviderType 标识要创建的重排提供者。

const (
	RerankCohere RerankProviderType = "cohere"
	RerankVoyage RerankProviderType = "voyage"
	RerankJina   RerankProviderType = "jina"
)

type Reranker ¶

type Reranker interface {
	// Rerank 重排序结果
	Rerank(ctx context.Context, query string, results []RetrievalResult) ([]RetrievalResult, error)
}

Reranker 重排序器接口

type RerankerType ¶

type RerankerType string

RerankerType 重排序器类型

const (
	RerankerSimple       RerankerType = "simple"        // 简单词重叠
	RerankerCrossEncoder RerankerType = "cross_encoder" // Cross-Encoder 模型
	RerankerLLM          RerankerType = "llm"           // LLM 重排序
)

type RetrievalResult ¶

type RetrievalResult struct {
	Document    Document `json:"document"`
	BM25Score   float64  `json:"bm25_score"`
	VectorScore float64  `json:"vector_score"`
	HybridScore float64  `json:"hybrid_score"`
	RerankScore float64  `json:"rerank_score,omitempty"`
	FinalScore  float64  `json:"final_score"`
}

RetrievalResult 检索结果

type RetrievalStrategy ¶

type RetrievalStrategy string

检索策略代表检索策略

const (
	StrategyVector     RetrievalStrategy = "vector"     // Pure vector/semantic search
	StrategyBM25       RetrievalStrategy = "bm25"       // Pure keyword/BM25 search
	StrategyHybrid     RetrievalStrategy = "hybrid"     // Combined vector + BM25
	StrategyMultiHop   RetrievalStrategy = "multi_hop"  // Multi-hop reasoning
	StrategyGraphRAG   RetrievalStrategy = "graph_rag"  // Graph-based retrieval
	StrategyContextual RetrievalStrategy = "contextual" // Contextual retrieval
	StrategyDense      RetrievalStrategy = "dense"      // Dense passage retrieval
	StrategySparse     RetrievalStrategy = "sparse"     // Sparse retrieval (TF-IDF)
)

type RetrieverOption ¶

type RetrieverOption func(*retrieverOptions)

RetrieverOption 配置 NewRetrieverFromConfig 的可选参数。

func WithEmbeddingType ¶

func WithEmbeddingType(t EmbeddingProviderType) RetrieverOption

WithEmbeddingType 指定嵌入提供者类型。

func WithLogger ¶

func WithLogger(l *zap.Logger) RetrieverOption

WithLogger 设置日志记录器。

func WithRerankType ¶

func WithRerankType(t RerankProviderType) RetrieverOption

WithRerankType 指定重排提供者类型。

type RoutingCondition ¶

type RoutingCondition struct {
	Type     string  `json:"type"`     // "intent", "keyword", "length", "complexity"
	Value    string  `json:"value"`    // Value to match
	Weight   float64 `json:"weight"`   // Weight adjustment when matched
	Operator string  `json:"operator"` // "equals", "contains", "greater", "less"
}

路由条件代表了路由条件

type RoutingDecision ¶

type RoutingDecision struct {
	Query            string                        `json:"query"`
	SelectedStrategy RetrievalStrategy             `json:"selected_strategy"`
	Confidence       float64                       `json:"confidence"`
	Scores           map[RetrievalStrategy]float64 `json:"scores"`
	Reasoning        string                        `json:"reasoning,omitempty"`
	Metadata         map[string]any                `json:"metadata,omitempty"`
	Timestamp        time.Time                     `json:"timestamp"`
}

运行决定代表查询的路径决定

func (*RoutingDecision) FromJSON ¶

func (d *RoutingDecision) FromJSON(data []byte) error

JSON将JSON的例行决定断章取义

func (*RoutingDecision) ToJSON ¶

func (d *RoutingDecision) ToJSON() ([]byte, error)

ToJSON 串行决定给JSON

type RoutingFeedback ¶

type RoutingFeedback struct {
	Query            string            `json:"query"`
	SelectedStrategy RetrievalStrategy `json:"selected_strategy"`
	Success          bool              `json:"success"`
	Score            float64           `json:"score"`
	Timestamp        time.Time         `json:"timestamp"`
}

RoutingFeedback代表了对路线决定的反馈

type SearchResult ¶

type SearchResult struct {
	ID       string
	Distance float64
	Score    float64 // 1 - distance (for cosine)
}

SearchResult 搜索结果

type SemanticCache ¶

type SemanticCache struct {
	// contains filtered or unexported fields
}

SemanticCache 语义缓存（基于向量相似度）

func NewSemanticCache ¶

func NewSemanticCache(store VectorStore, config SemanticCacheConfig, logger *zap.Logger) *SemanticCache

NewSemanticCache 创建语义缓存

func (*SemanticCache) Clear ¶

func (c *SemanticCache) Clear(ctx context.Context) error

Clear 清空缓存

func (*SemanticCache) Get ¶

func (c *SemanticCache) Get(ctx context.Context, queryEmbedding []float64) (*Document, bool)

Get 从缓存获取

func (*SemanticCache) Set ¶

func (c *SemanticCache) Set(ctx context.Context, doc Document) error

Set 设置缓存

type SemanticCacheConfig ¶

type SemanticCacheConfig struct {
	SimilarityThreshold float64 `json:"similarity_threshold"` // 相似度阈值（0.9-0.95）
}

SemanticCacheConfig 语义缓存配置

type SimpleContextProvider ¶

type SimpleContextProvider struct {
	// contains filtered or unexported fields
}

SimpleContextProvider 基于模板的简单上下文提供器。不依赖 LLM，通过提取文档元数据来生成上下文摘要，适用于本地开发、测试和不需要 LLM 调用的场景。

func NewSimpleContextProvider ¶

func NewSimpleContextProvider(logger *zap.Logger) *SimpleContextProvider

NewSimpleContextProvider 创建简单上下文提供器。

func (*SimpleContextProvider) GenerateContext ¶

func (p *SimpleContextProvider) GenerateContext(ctx context.Context, doc Document, chunk string) (string, error)

GenerateContext 为 chunk 生成上下文。基于文档元数据（title、section）和 chunk 内容生成简要上下文描述。

type SimpleGraphEmbedder ¶

type SimpleGraphEmbedder struct {
	// contains filtered or unexported fields
}

SimpleGraphEmbedder 基于词袋模型的简单嵌入生成器。不依赖外部嵌入服务，通过词频统计生成固定维度的向量，适用于本地开发、测试和不需要高质量嵌入的场景。

func NewSimpleGraphEmbedder ¶

func NewSimpleGraphEmbedder(config SimpleGraphEmbedderConfig, logger *zap.Logger) *SimpleGraphEmbedder

NewSimpleGraphEmbedder 创建简单嵌入生成器。

func (*SimpleGraphEmbedder) Embed ¶

func (e *SimpleGraphEmbedder) Embed(ctx context.Context, text string) ([]float64, error)

Embed 为文本生成嵌入向量。使用词袋 + 哈希映射的方式将文本映射到固定维度的向量空间，并进行 L2 归一化。

type SimpleGraphEmbedderConfig ¶

type SimpleGraphEmbedderConfig struct {
	// Dimension 嵌入向量维度，默认 128。
	Dimension int
}

SimpleGraphEmbedderConfig 简单嵌入生成器配置。

type SimpleReranker ¶

type SimpleReranker struct {
	// contains filtered or unexported fields
}

SimpleReranker 简单重排序器（基于词重叠和位置）

func NewSimpleReranker ¶

func NewSimpleReranker(logger *zap.Logger) *SimpleReranker

NewSimpleReranker 创建简单重排序器

func (*SimpleReranker) Rerank ¶

func (r *SimpleReranker) Rerank(ctx context.Context, query string, results []RetrievalResult) ([]RetrievalResult, error)

Rerank 重排序

type SimpleTokenizer ¶

type SimpleTokenizer struct{}

SimpleTokenizer 简单分词器（1 token ≈ 4 字符）。仅用于测试和快速原型。生产环境请使用 NewTiktokenAdapter 创建基于 tiktoken 的分词器。

func (*SimpleTokenizer) CountTokens ¶

func (t *SimpleTokenizer) CountTokens(text string) int

func (*SimpleTokenizer) Encode ¶

func (t *SimpleTokenizer) Encode(text string) []int

type StrategyConfig ¶

type StrategyConfig struct {
	Strategy   RetrievalStrategy  `json:"strategy"`
	Enabled    bool               `json:"enabled"`
	Weight     float64            `json:"weight"`     // Base weight for this strategy
	MinScore   float64            `json:"min_score"`  // Minimum score to use this strategy
	MaxTokens  int                `json:"max_tokens"` // Max query tokens for this strategy
	Conditions []RoutingCondition `json:"conditions"` // Conditions that favor this strategy
}

策略Config 配置检索策略

type StrategyStats ¶

type StrategyStats struct {
	Strategy     RetrievalStrategy `json:"strategy"`
	TotalCalls   int               `json:"total_calls"`
	SuccessRate  float64           `json:"success_rate"`
	AverageScore float64           `json:"average_score"`
}

战略统计数据代表一项战略的统计数据

type StrategyWithWeight ¶

type StrategyWithWeight struct {
	Strategy RetrievalStrategy `json:"strategy"`
	Weight   float64           `json:"weight"`
}

战略用Weight代表着一个有分量的策略

type StructuralBlock ¶

type StructuralBlock struct {
	Type     string // code, table, text, header
	Content  string
	StartPos int
	EndPos   int
}

StructuralBlock 结构块

type Tokenizer ¶

type Tokenizer interface {
	CountTokens(text string) int
	Encode(text string) []int
}

Tokenizer 分词器接口（RAG 分块专用）。

注意：项目中存在三个 Tokenizer 接口，各自服务不同层次，无法统一：

types.Tokenizer — 框架层，面向 Message/ToolSchema，无 error 返回
llm/tokenizer.Tokenizer — LLM 层，完整编解码 + error 返回 + 模型感知
rag.Tokenizer（本接口） — RAG 分块专用，最小接口（CountTokens + Encode），无 error

若需将 llm/tokenizer.Tokenizer 适配为本接口，使用 NewLLMTokenizerAdapter()。统一会导致循环依赖（rag → types.Message）或强制不必要的方法签名变更。

func NewEstimatorAdapter ¶

func NewEstimatorAdapter(model string, maxTokens int, logger *zap.Logger) Tokenizer

NewEstimatorAdapter 创建一个基于 llm/tokenizer.EstimatorTokenizer 的 rag.Tokenizer 适配器。比 SimpleTokenizer 更精确（CJK 感知），且不需要外部编码数据下载。 model 参数仅用于标识，maxTokens 指定模型上下文长度（0 使用默认值 4096）。

func NewTiktokenAdapter ¶

func NewTiktokenAdapter(model string, logger *zap.Logger) (Tokenizer, error)

NewTiktokenAdapter 创建一个基于 tiktoken 的 rag.Tokenizer 适配器。 model 参数指定 tiktoken 模型（如 "gpt-4o", "gpt-4", "gpt-3.5-turbo"）。

type TransformationType ¶

type TransformationType string

转变类型代表查询转换类型

const (
	TransformExpansion     TransformationType = "expansion"     // Generate related queries
	TransformRewrite       TransformationType = "rewrite"       // Rewrite for better retrieval
	TransformDecomposition TransformationType = "decomposition" // Break into sub-queries
	TransformHyDE          TransformationType = "hyde"          // Hypothetical Document Embedding
	TransformStepBack      TransformationType = "step_back"     // Step-back prompting
)

type TransformedQuery ¶

type TransformedQuery struct {
	Original    string             `json:"original"`
	Transformed string             `json:"transformed"`
	Type        TransformationType `json:"type"`
	Intent      QueryIntent        `json:"intent,omitempty"`
	Confidence  float64            `json:"confidence"`
	SubQueries  []string           `json:"sub_queries,omitempty"`
	Keywords    []string           `json:"keywords,omitempty"`
	Entities    []string           `json:"entities,omitempty"`
	Metadata    map[string]any     `json:"metadata,omitempty"`
}

已变形查询代表一个带有元数据的已变形查询

func (*TransformedQuery) FromJSON ¶

func (tq *TransformedQuery) FromJSON(data []byte) error

从 JSON 解析出一个变形查询

func (*TransformedQuery) ToJSON ¶

func (tq *TransformedQuery) ToJSON() ([]byte, error)

ToJSON 将变形查询序列化为 JSON

type Triple ¶

type Triple struct {
	Subject   string `json:"subject"`
	Predicate string `json:"predicate"`
	Object    string `json:"object"`
}

三相代表一个主题-前相-对象三相.

type VectorIndex ¶

type VectorIndex interface {
	// Build 构建索引
	Build(vectors [][]float64, ids []string) error

	// Search 搜索最近邻
	Search(query []float64, k int) ([]SearchResult, error)

	// Add 添加向量
	Add(vector []float64, id string) error

	// Delete 删除向量
	Delete(id string) error

	// Size 索引大小
	Size() int
}

VectorIndex 向量索引接口

type VectorSearchResult ¶

type VectorSearchResult struct {
	Document Document `json:"document"`
	Score    float64  `json:"score"`
	Distance float64  `json:"distance"`
}

VectorSearchResult 向量搜索结果

type VectorStore ¶

type VectorStore interface {
	// 添加文档
	AddDocuments(ctx context.Context, docs []Document) error

	// 搜索相似文档
	Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)

	// 删除文档
	DeleteDocuments(ctx context.Context, ids []string) error

	// 更新文档
	UpdateDocument(ctx context.Context, doc Document) error

	// 获取文档数量
	Count(ctx context.Context) (int, error)
}

VectorStore 向量数据库接口

func NewPineconeVectorStore ¶

func NewPineconeVectorStore(cfg PineconeConfig, logger *zap.Logger) VectorStore

NewPineconeVectorStore creates a Pinecone-backed VectorStore from a PineconeConfig. Use this when Pinecone configuration is not part of the global config.Config.

func NewVectorStoreFromConfig ¶

func NewVectorStoreFromConfig(cfg *config.Config, storeType VectorStoreType, logger *zap.Logger) (VectorStore, error)

NewVectorStoreFromConfig 根据指定的后端类型和全局配置创建 VectorStore。当 storeType 为空字符串时，默认使用 InMemory 后端。

type VectorStoreType ¶

type VectorStoreType string

VectorStoreType 标识要创建的向量存储后端。

const (
	VectorStoreMemory   VectorStoreType = "memory"
	VectorStoreQdrant   VectorStoreType = "qdrant"
	VectorStoreWeaviate VectorStoreType = "weaviate"
	VectorStoreMilvus   VectorStoreType = "milvus"
	VectorStorePinecone VectorStoreType = "pinecone"
)

type VisualizationEdge ¶

type VisualizationEdge struct {
	Source string  `json:"source"`
	Target string  `json:"target"`
	Label  string  `json:"label,omitempty"`
	Weight float64 `json:"weight,omitempty"`
}

可视化Edge代表可视化中的边缘

type VisualizationNode ¶

type VisualizationNode struct {
	ID       string         `json:"id"`
	Type     string         `json:"type"` // "query", "hop", "document", "answer"
	Label    string         `json:"label"`
	Metadata map[string]any `json:"metadata,omitempty"`
}

可视化节点代表可视化中的节点

type WeaviateConfig ¶

type WeaviateConfig struct {
	// 连接设置
	Host    string `json:"host"`               // Weaviate host (default: localhost)
	Port    int    `json:"port"`               // Weaviate port (default: 8080)
	Scheme  string `json:"scheme,omitempty"`   // http or https (default: http)
	BaseURL string `json:"base_url,omitempty"` // Full base URL (overrides host/port/scheme)

	// 认证
	APIKey string `json:"api_key,omitempty"` // API key for authentication

	// 类/收集设置
	ClassName string `json:"class_name"` // Weaviate class name (required)

	// Schema 设置
	AutoCreateSchema bool   `json:"auto_create_schema,omitempty"` // Auto-create class if not exists
	VectorIndexType  string `json:"vector_index_type,omitempty"`  // hnsw (default), flat
	Distance         string `json:"distance,omitempty"`           // cosine (default), dot, l2, hamming, manhattan

	// 矢量设置
	VectorSize int `json:"vector_size,omitempty"` // Vector dimension (optional, auto-detected)

	// 混合搜索设置
	HybridAlpha float64 `json:"hybrid_alpha,omitempty"` // Alpha for hybrid search (0=BM25, 1=vector, default: 0.5)

	// 超时设置
	Timeout time.Duration `json:"timeout,omitempty"` // Request timeout (default: 30s)

	// 属性字段名称
	ContentProperty  string `json:"content_property,omitempty"`  // Property for document content (default: content)
	MetadataProperty string `json:"metadata_property,omitempty"` // Property for document metadata (default: metadata)
	DocIDProperty    string `json:"doc_id_property,omitempty"`   // Property for original document ID (default: docId)
}

WeaviateConfig配置了Weaviate矢量Store执行.

Weaviate是一个开源向量数据库,支持: - 具有多距离测量标准的矢量搜索 - BM25关键字搜索 - 混合搜索(合并向量和BM25) - 用于灵活查询的图表QL API - 自动计划管理

type WeaviateStore ¶

type WeaviateStore struct {
	// contains filtered or unexported fields
}

Weaviate Store 使用 Weaviate 的 REST 和 GraphQL API 执行 VectorStore 。

func NewWeaviateStore ¶

func NewWeaviateStore(cfg WeaviateConfig, logger *zap.Logger) *WeaviateStore

NewWeaviate Store创建了由Weaviate支撑的"矢量".

func (*WeaviateStore) AddDocuments ¶

func (s *WeaviateStore) AddDocuments(ctx context.Context, docs []Document) error

添加文档将文档添加到 Weaviate 商店。

func (*WeaviateStore) BM25Search ¶

func (s *WeaviateStore) BM25Search(ctx context.Context, queryText string, topK int) ([]VectorSearchResult, error)

BM25Search执行基于关键词的BM25搜索.

func (*WeaviateStore) ClearAll ¶

func (s *WeaviateStore) ClearAll(ctx context.Context) error

ClearAll deletes the entire Weaviate class and resets the schema guard so it can be recreated on the next AddDocuments call.

func (*WeaviateStore) Count ¶

func (s *WeaviateStore) Count(ctx context.Context) (int, error)

计数返回收藏中的文档总数。

func (*WeaviateStore) DeleteClass ¶

func (s *WeaviateStore) DeleteClass(ctx context.Context) error

删除Class删除整个Weviate类(谨慎使用).

func (*WeaviateStore) DeleteDocuments ¶

func (s *WeaviateStore) DeleteDocuments(ctx context.Context, ids []string) error

删除文档用其标识删除文档。

func (*WeaviateStore) GetSchema ¶

func (s *WeaviateStore) GetSchema(ctx context.Context) (map[string]any, error)

GetSchema 返回当前类计划。

func (*WeaviateStore) HybridSearch ¶

func (s *WeaviateStore) HybridSearch(ctx context.Context, queryText string, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)

HybridSearch)进行混合搜索,结合了矢量相似性和BM25.

func (*WeaviateStore) ListDocumentIDs ¶

func (s *WeaviateStore) ListDocumentIDs(ctx context.Context, limit int, offset int) ([]string, error)

ListDocumentIDs returns a paginated list of document IDs stored in the Weaviate class. It uses a GraphQL query to retrieve the docId property with limit and offset.

func (*WeaviateStore) Search ¶

func (s *WeaviateStore) Search(ctx context.Context, queryEmbedding []float64, topK int) ([]VectorSearchResult, error)

搜索执行向量相似性搜索。

func (*WeaviateStore) UpdateDocument ¶

func (s *WeaviateStore) UpdateDocument(ctx context.Context, doc Document) error

更新文档更新一个文档(upsert).

type WebRetrievalResult ¶

type WebRetrievalResult struct {
	URL     string  `json:"url"`
	Title   string  `json:"title"`
	Content string  `json:"content"`
	Score   float64 `json:"score"`
}

Web RetrivalResult代表了为RAG所改编的网络搜索的结果.

type WebRetriever ¶

type WebRetriever struct {
	// contains filtered or unexported fields
}

WebRetriever将本地RAG检索与实时网络搜索相结合. 它利用可配置的重量分配法将两种来源的结果合并并提供了两个源失败时的倒置行为.

func NewWebRetriever ¶

func NewWebRetriever(
	config WebRetrieverConfig,
	localRetriever *HybridRetriever,
	webSearchFn WebSearchFunc,
	logger *zap.Logger,
) *WebRetriever

新WebRetriever创建了新的网络增强检索器.

func (*WebRetriever) Retrieve ¶

func (wr *WebRetriever) Retrieve(ctx context.Context, query string, queryEmbedding []float64) ([]RetrievalResult, error)

检索为给定查询执行混合本地+网络检索.

type WebRetrieverConfig ¶

type WebRetrieverConfig struct {
	// 地方和网络成果之间的重量分配
	LocalWeight float64 `json:"local_weight"` // Weight for local RAG results (0-1)
	WebWeight   float64 `json:"web_weight"`   // Weight for web search results (0-1)

	// 网络搜索设置
	MaxWebResults    int           `json:"max_web_results"`    // Maximum web results to fetch
	WebSearchTimeout time.Duration `json:"web_search_timeout"` // Timeout for web search
	ParallelSearch   bool          `json:"parallel_search"`    // Search local and web in parallel

	// 结果合并
	TopK             int     `json:"top_k"`              // Final number of results to return
	MinScore         float64 `json:"min_score"`          // Minimum score threshold
	DeduplicateByURL bool    `json:"deduplicate_by_url"` // Remove duplicate URLs

	// 缓存
	EnableCache bool          `json:"enable_cache"` // Cache web results
	CacheTTL    time.Duration `json:"cache_ttl"`    // Cache time-to-live

	// 退后行为
	FallbackToLocal bool `json:"fallback_to_local"` // Use local-only if web fails
	FallbackToWeb   bool `json:"fallback_to_web"`   // Use web-only if local fails
}

WebRetrieverConfig 配置了网络增强的检索系统.

func DefaultWebRetrieverConfig ¶

func DefaultWebRetrieverConfig() WebRetrieverConfig

默认WebRetrieverConfig 返回合理的默认值。

type WebSearchFunc ¶

type WebSearchFunc func(ctx context.Context, query string, maxResults int) ([]WebRetrievalResult, error)

WebSearchFunc定义了用于网络搜索集成的功能签名. 这让检索器与特定的网络搜索执行脱钩. 用户可以将任何 WebSearch Provider(从llm/tools)包入此功能.

Directories ¶

Path	Synopsis
loader
sources

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL