vectorstore

package
v1.4.4-alpha1202-diff-... Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 2, 2025 License: AGPL-3.0 Imports: 39 Imported by: 0

Documentation

Index

Constants

View Source
const (
	BasicPlan                              = "basic" // 空字符串表示不使用任何增强计划
	EnhancePlanHypotheticalAnswer          = "hypothetical_answer"
	EnhancePlanHypotheticalAnswerWithSplit = "hypothetical_answer_with_split"
	EnhancePlanSplitQuery                  = "split_query"
	EnhancePlanGeneralizeQuery             = "generalize_query"
	EnhancePlanExactKeywordSearch          = "exact_keyword_search"
)
View Source
const (
	Policy_UseDBCanche = "DB_Cache"
	Policy_UseFilter   = "Filter"
	Policy_None        = "None"
)
View Source
const (
	RAGResultTypeMessage   = "message"
	RAGResultEntity        = "entity"
	RAGResultTypeMidResult = "mid_result"
	RAGResultTypeResult    = "result"
	RAGResultTypeError     = "error"
	RAGResultTypeERM       = "erm_analysis"
	RAGResultTypeDotGraph  = "dot_graph"
)

RAG 搜索结果类型常量

View Source
const (
	// BigTextPlanChunkText 将大文本分割成多个文档分别存储
	BigTextPlanChunkText = "chunkText"

	// BigTextPlanChunkTextAndAvgPooling 将大文本分割后生成多个嵌入向量,然后平均池化成一个文档存储
	BigTextPlanChunkTextAndAvgPooling = "chunkTextAndAvgPooling"

	// DocumentTypeCollectionInfo 表示集合信息
	DocumentTypeCollectionInfo = "__collection_info__"
)

BigTextPlan 常量定义

Variables

View Source
var ErrGraphNodesIsEmpty = graphNodesIsEmpty

ErrGraphNodesIsEmpty is exported for external error checking

View Source
var IsMockMode = false
View Source
var Vocabulary1024 = []string{}/* 1024 elements not displayed */

Functions

func AIBalanceFreeEmbeddingFunc

func AIBalanceFreeEmbeddingFunc(text string) ([]float32, error)

AIBalanceFreeEmbeddingFunc 全局嵌入函数,使用 AIBalance 免费服务生成文本的嵌入向量

func ChunkText

func ChunkText(text string, maxChunkSize int, overlap int) []string

ChunkText 将长文本分割成多个小块,以便于处理和嵌入 使用rune来分割文本,更好地支持Unicode字符(如中文)

func CleanupRedundantServices

func CleanupRedundantServices() error

CleanupRedundantServices 清理多余的llama-server进程 只保留一个正常工作的服务

func CreateCollectionRecord

func CreateCollectionRecord(db *gorm.DB, name string, description string, opts ...CollectionConfigFunc) (*schema.VectorStoreCollection, error)

func DeleteCollection

func DeleteCollection(db *gorm.DB, name string) error

DeleteCollection 删除知识库

func Embedding

func Embedding(text string) ([]float32, error)

Embedding 全局嵌入函数,使用单例服务生成文本的嵌入向量 如果服务未启动,会自动启动;如果无法启动,则报错

func ExportHNSWGraphToBinary

func ExportHNSWGraphToBinary(graph *hnsw.Graph[string]) (io.Reader, error)

func ExportRAGToBinary

func ExportRAGToBinary(collectionName string, opts ...RAGExportOptionFunc) (io.Reader, error)

ExportRAGToBinary 导出RAG数据为二进制格式

func ExportRAGToFile

func ExportRAGToFile(collectionName string, fileName string, opts ...RAGExportOptionFunc) error

func FormatRagPrompt

func FormatRagPrompt(query string, results []SearchResult, promptTemplate string) string

FormatRagPrompt 格式化 RAG 提示,结合用户问题和检索到的文档

func GetLazyNodeUIDByMd5

func GetLazyNodeUIDByMd5(collectionName string, key string) []byte

func GetServiceStatus

func GetServiceStatus() (bool, string, error)

GetServiceStatus 获取服务状态信息

func HasCollection

func HasCollection(db *gorm.DB, name string) bool

HasCollection 检查知识库是否存在

func ImportRAGFromFile

func ImportRAGFromFile(inputPath string, optFuncs ...RAGExportOptionFunc) error

ImportRAGFromFile 从二进制文件导入RAG数据,支持从文件路径导入

func ImportRAGFromReader

func ImportRAGFromReader(reader io.Reader, optFuncs ...RAGExportOptionFunc) error

ImportRAGFromReader 从二进制流导入RAG数据

func IsAIBalanceFreeServiceAvailable

func IsAIBalanceFreeServiceAvailable() bool

IsAIBalanceFreeServiceAvailable 检查 AIBalance 免费服务是否可用 如果服务尚未初始化,会先尝试初始化

func IsCompatibleEmbeddingModel

func IsCompatibleEmbeddingModel(modelName1, modelName2 string) bool

IsCompatibleEmbeddingModel 检查两个模型名称是否兼容 兼容的模型具有相同的嵌入维度和归一化名称

func IsServiceRunning

func IsServiceRunning() bool

IsServiceRunning 检查嵌入服务是否正在运行

func ListCollections

func ListCollections(db *gorm.DB) []string

ListCollections 获取所有知识库列表

func MethodVerboseName

func MethodVerboseName(i string) string

func MigrateHNSWGraph

func MigrateHNSWGraph(db *gorm.DB, collection *schema.VectorStoreCollection) error

func NewHNSWGraph

func NewHNSWGraph(collectionName string, opts ...hnsw.GraphOption[string]) *hnsw.Graph[string]

func NewVectorStoreDatabase

func NewVectorStoreDatabase(path string) (*gorm.DB, error)

func NormalizeEmbeddingModelName

func NormalizeEmbeddingModelName(modelName string) string

NormalizeEmbeddingModelName 归一化 embedding 模型名称 将各种变体的模型名称统一为标准名称 例如: - "Qwen3-Embedding-0.6B-Q4_K_M" -> "Qwen3-Embedding-0.6B" - "Qwen3-Embedding-0.6B" -> "Qwen3-Embedding-0.6B" - "embedding-free" -> "Qwen3-Embedding-0.6B"

func Query

func Query(db *gorm.DB, query string, opts ...CollectionQueryOption) (<-chan *RAGSearchResult, error)

Query 在RAG系统中搜索多个集合 这个函数直接在RAG级别进行查询,不依赖于知识库结构

func QueryYakitProfile

func QueryYakitProfile(query string, opts ...CollectionQueryOption) (<-chan *RAGSearchResult, error)

func RemoveCollection

func RemoveCollection(db *gorm.DB, collectionName string) error

func ResetAIBalanceFreeService

func ResetAIBalanceFreeService()

ResetAIBalanceFreeService 重置服务单例(仅用于测试或特殊情况)

func ResetService

func ResetService()

ResetService 重置服务单例(仅用于测试或特殊情况)

func SplitDocumentsByMetadata

func SplitDocumentsByMetadata(docs []Document, metadataKey string) map[any][]Document

SplitDocumentsByMetadata 根据元数据字段将文档分组

Types

type AIBalanceFreeEmbedding

type AIBalanceFreeEmbedding struct {
	// contains filtered or unexported fields
}

AIBalanceFreeEmbedding 基于 AIBalance 免费服务的嵌入服务客户端

func GetAIBalanceFreeEmbeddingService

func GetAIBalanceFreeEmbeddingService() (*AIBalanceFreeEmbedding, error)

GetAIBalanceFreeEmbeddingService 获取 AIBalance 免费嵌入服务单例 这是一个便捷函数,直接返回单例实例

func NewAIBalanceFreeEmbedder

func NewAIBalanceFreeEmbedder() (*AIBalanceFreeEmbedding, error)

NewAIBalanceFreeEmbedder 创建 AIBalance 免费嵌入客户端单例 该函数使用 sync.Once 确保只创建一次实例,并在创建时检测服务可用性

func (*AIBalanceFreeEmbedding) Embedding

func (a *AIBalanceFreeEmbedding) Embedding(text string) ([]float32, error)

Embedding 实现 EmbeddingClient 接口,生成文本的嵌入向量

func (*AIBalanceFreeEmbedding) EmbeddingRaw

func (a *AIBalanceFreeEmbedding) EmbeddingRaw(text string) ([][]float32, error)

EmbeddingRaw 实现 EmbeddingClient 接口,返回原始的 embedding 结果

func (*AIBalanceFreeEmbedding) GetModelDimension

func (a *AIBalanceFreeEmbedding) GetModelDimension() int

GetModelDimension 返回模型的嵌入向量维度

func (*AIBalanceFreeEmbedding) GetModelName

func (a *AIBalanceFreeEmbedding) GetModelName() string

GetModelName 返回归一化的模型名称 这个方法返回的名称应该与本地 Qwen3-Embedding-0.6B 模型保持一致

func (*AIBalanceFreeEmbedding) GetServiceInfo

func (a *AIBalanceFreeEmbedding) GetServiceInfo() (domain string, model string, available bool)

GetServiceInfo 返回服务信息

func (*AIBalanceFreeEmbedding) IsAvailable

func (a *AIBalanceFreeEmbedding) IsAvailable() bool

IsAvailable 返回服务是否可用

type CollectionConfig

type CollectionConfig struct {
	Description string

	// 是否强制创建新的知识库,如果已经存在,会返回错误
	ForceNew bool

	// embedding 配置
	ModelName       string
	Dimension       int
	EmbeddingClient aispec.EmbeddingCaller

	// hnsw 配置
	DistanceFuncType      string
	MaxNeighbors          int
	LayerGenerationFactor float64
	EfSearch              int
	EfConstruct           int

	EnablePQ                   bool
	EnableAutoUpdateGraphInfos bool
	DisableEmbedCollectionInfo bool
	LazyLoadEmbeddingClient    bool

	DB *gorm.DB

	MaxChunkSize int
	Overlap      int
	BigTextPlan  string

	CacheSize    int
	PreCacheSize int

	KeyAsUID bool

	TryRebuildHNSWIndex bool
	// contains filtered or unexported fields
}

func LoadConfigFromCollectionInfo

func LoadConfigFromCollectionInfo(collection *schema.VectorStoreCollection, options ...CollectionConfigFunc) *CollectionConfig

func NewCollectionConfig

func NewCollectionConfig(options ...CollectionConfigFunc) *CollectionConfig

func (*CollectionConfig) FixEmbeddingClient

func (c *CollectionConfig) FixEmbeddingClient() error

type CollectionConfigFunc

type CollectionConfigFunc func(config *CollectionConfig)

func WithBigTextPlan

func WithBigTextPlan(bigTextPlan string) CollectionConfigFunc

func WithBuildGraphFilter

func WithBuildGraphFilter(filter *yakit.VectorDocumentFilter) CollectionConfigFunc

func WithBuildGraphPolicy

func WithBuildGraphPolicy(policy string) CollectionConfigFunc

func WithCacheSize

func WithCacheSize(cacheSize int) CollectionConfigFunc

func WithCosineDistance

func WithCosineDistance() CollectionConfigFunc

func WithDB

func WithDB(db *gorm.DB) CollectionConfigFunc

WithDB 设置数据库

func WithDescription

func WithDescription(description string) CollectionConfigFunc

func WithDisableEmbedCollectionInfo

func WithDisableEmbedCollectionInfo(enable bool) CollectionConfigFunc

func WithEmbeddingClient

func WithEmbeddingClient(client aispec.EmbeddingCaller) CollectionConfigFunc

WithEmbeddingClient 设置embedding客户端

func WithEmbeddingModel

func WithEmbeddingModel(model string) CollectionConfigFunc

WithEmbeddingModel 设置embedding模型

func WithEnableAutoUpdateGraphInfos

func WithEnableAutoUpdateGraphInfos(enable bool) CollectionConfigFunc

func WithEnablePQ

func WithEnablePQ(enable bool) CollectionConfigFunc

func WithForceNew

func WithForceNew(i ...bool) CollectionConfigFunc

func WithHNSWParameters

func WithHNSWParameters(m int, ml float64, efSearch, efConstruct int) CollectionConfigFunc

WithHNSWParameters 批量设置HNSW参数

func WithKeyAsUID

func WithKeyAsUID(keyAsUID bool) CollectionConfigFunc

func WithLazyLoadEmbeddingClient

func WithLazyLoadEmbeddingClient() CollectionConfigFunc

func WithMaxChunkSize

func WithMaxChunkSize(maxChunkSize int) CollectionConfigFunc

func WithModelDimension

func WithModelDimension(dimension int) CollectionConfigFunc

WithModelDimension 设置模型维度

func WithModelName

func WithModelName(name string) CollectionConfigFunc

func WithOverlap

func WithOverlap(overlap int) CollectionConfigFunc

func WithPreCacheSize

func WithPreCacheSize(preCacheSize int) CollectionConfigFunc

func WithTryRebuildHNSWIndex

func WithTryRebuildHNSWIndex(tryRebuildHNSWIndex bool) CollectionConfigFunc

type CollectionInfo

type CollectionInfo struct {
	Name        string
	Description string
	ModelName   string
	Dimension   int

	M                int
	Ml               float64
	EfSearch         int
	EfConstruct      int
	DistanceFuncType string
}

func GetCollectionInfo

func GetCollectionInfo(db *gorm.DB, name string) (*CollectionInfo, error)

GetCollectionInfo 获取知识库信息

type CollectionQueryConfig

type CollectionQueryConfig struct {
	Ctx                  context.Context
	Limit                int // 单次子查询的结果限制。
	CollectionNumLimit   int
	QueryCollectionNames []string
	CollectionScoreLimit float64
	EnhancePlan          []string // 默认开启 HyDE 、 泛化查询 、拆分查询
	Filter               func(key string, getDoc func() *Document) bool
	Concurrent           int
	MsgCallBack          func(*RAGSearchResult)
	OnSubQueryStart      func(method string, query string)
	OnQueryFinish        func([]*ScoredResult)
	OnStatus             func(label string, value string)
	OnlyResults          bool // 仅返回最终结果,忽略中间结果和消息

	// On Stream Reader
	OnLogReader func(reader io.Reader)

	RAGSimilarityThreshold   float64 // RAG相似度限制
	EveryQueryResultCallback func(result *ScoredResult)
	RAGQueryType             []string

	EnhanceSearchHandler enhancesearch.SearchHandler

	LoadConfig []CollectionConfigFunc
}

CollectionQueryConfig RAG查询配置

func NewRAGQueryConfig

func NewRAGQueryConfig(opts ...CollectionQueryOption) *CollectionQueryConfig

NewRAGQueryConfig 创建新的RAG查询配置

type CollectionQueryOption

type CollectionQueryOption func(*CollectionQueryConfig)

CollectionQueryOption RAG查询选项

func WithEveryQueryResultCallback

func WithEveryQueryResultCallback(callback func(result *ScoredResult)) CollectionQueryOption

func WithRAGCollectionLimit

func WithRAGCollectionLimit(collectionLimit int) CollectionQueryOption

WithRAGCollectionLimit 设置搜索的集合数量限制

func WithRAGCollectionName

func WithRAGCollectionName(collectionName string) CollectionQueryOption

WithRAGCollectionName 指定搜索的集合名称

func WithRAGCollectionScoreLimit

func WithRAGCollectionScoreLimit(scoreLimit float64) CollectionQueryOption

WithRAGCollectionScoreLimit 设置集合搜索分数阈值

func WithRAGConcurrent

func WithRAGConcurrent(concurrent int) CollectionQueryOption

WithRAGConcurrent 设置并发数

func WithRAGCtx

func WithRAGCtx(ctx context.Context) CollectionQueryOption

WithRAGCtx 设置上下文

func WithRAGDocumentType

func WithRAGDocumentType(documentType ...string) CollectionQueryOption

func WithRAGEnhance

func WithRAGEnhance(enhancePlan ...string) CollectionQueryOption

WithRAGEnhance 启用或禁用增强搜索

func WithRAGEnhanceSearchHandler

func WithRAGEnhanceSearchHandler(handler enhancesearch.SearchHandler) CollectionQueryOption

func WithRAGFilter

func WithRAGFilter(filter func(key string, getDoc func() *Document) bool) CollectionQueryOption

WithRAGFilter 设置文档过滤器

func WithRAGLimit

func WithRAGLimit(limit int) CollectionQueryOption

WithRAGLimit 设置查询结果限制

func WithRAGLogReader

func WithRAGLogReader(f func(reader io.Reader)) CollectionQueryOption

func WithRAGMsgCallBack

func WithRAGMsgCallBack(msgCallBack func(*RAGSearchResult)) CollectionQueryOption

WithRAGMsgCallBack 设置消息回调函数

func WithRAGOnQueryFinish

func WithRAGOnQueryFinish(callback func([]*ScoredResult)) CollectionQueryOption

func WithRAGOnlyResults

func WithRAGOnlyResults(onlyResults bool) CollectionQueryOption

func WithRAGQueryCollectionNames

func WithRAGQueryCollectionNames(collectionNames ...string) CollectionQueryOption

func WithRAGQueryStatus

func WithRAGQueryStatus(i func(label string, i any, tags ...string)) CollectionQueryOption

func WithRAGSimilarityThreshold

func WithRAGSimilarityThreshold(threshold float64) CollectionQueryOption

func WithRAGSystemLoadConfig

func WithRAGSystemLoadConfig(loadConfig ...CollectionConfigFunc) CollectionQueryOption

type Document

type Document struct {
	ID              string                 `json:"id"`   // 文档唯一标识符
	Type            schema.RAGDocumentType `json:"type"` // 文档类型
	EntityUUID      string                 `json:"entityUUID"`
	RelatedEntities []string               `json:"relatedEntities"`
	Content         string                 `json:"content"`  // 文档内容
	Metadata        schema.MetadataMap     `json:"metadata"` // 文档元数据
	Embedding       []float32              `json:"-"`        // 文档的嵌入向量,不参与 JSON 序列化
	RuntimeID       string                 `json:"runtimeID"`
}

Document 表示可以被检索的文档

func BuildDocument

func BuildDocument(docId, content string, opts ...DocumentOption) *Document

func TextToDocuments

func TextToDocuments(text string, maxChunkSize int, overlap int, metadata map[string]any) []Document

TextToDocuments 将文本转换为文档对象

type DocumentOption

type DocumentOption func(document *Document)

func WithDocumentEntityID

func WithDocumentEntityID(entityUUID string) DocumentOption

func WithDocumentMetadataKeyValue

func WithDocumentMetadataKeyValue(key string, value any) DocumentOption

func WithDocumentRawMetadata

func WithDocumentRawMetadata(i map[string]any) DocumentOption

func WithDocumentRelatedEntities

func WithDocumentRelatedEntities(uuids ...string) DocumentOption

func WithDocumentRuntimeID

func WithDocumentRuntimeID(runtimeID string) DocumentOption

func WithDocumentType

func WithDocumentType(i schema.RAGDocumentType) DocumentOption

type EmbeddingClient

type EmbeddingClient interface {
	Embedding(text string) ([]float32, error)
	// EmbeddingRaw 返回原始的 embedding 结果,可能包含多个向量
	EmbeddingRaw(text string) ([][]float32, error)
}

func NewMockEmbedder

func NewMockEmbedder(f func(text string) ([]float32, error)) EmbeddingClient

type EmptyEmbedding

type EmptyEmbedding struct{}

func (EmptyEmbedding) Embedding

func (e EmptyEmbedding) Embedding(text string) ([]float32, error)

type ExportVectorStoreDocument

type ExportVectorStoreDocument struct {
	DocumentID      string                 `json:"document_id"`
	Metadata        map[string]interface{} `json:"metadata"`
	Embedding       []float32              `json:"embedding"`
	PQCode          []byte                 `json:"pq_code"`
	Content         string                 `json:"content"`
	DocumentType    string                 `json:"document_type"`
	EntityID        string                 `json:"entity_id"`
	RelatedEntities string                 `json:"related_entities"`
}

type LocalModelEmbedding

type LocalModelEmbedding struct {
	// contains filtered or unexported fields
}

LocalModelEmbedding 基于本地模型的嵌入服务客户端

func GetLocalEmbeddingService

func GetLocalEmbeddingService() (*LocalModelEmbedding, error)

GetLocalEmbeddingService 获取本地嵌入服务单例 使用单例模式,确保只有一个 Embedding 服务实例

func NewLocalModelEmbedding

func NewLocalModelEmbedding(model *localmodel.Model, address string) *LocalModelEmbedding

NewLocalModelEmbedding 创建本地模型嵌入客户端

func StartLocalEmbeddingService

func StartLocalEmbeddingService() (*LocalModelEmbedding, error)

StartLocalEmbeddingService 启动本地嵌入服务 (已废弃,使用 GetLocalEmbeddingService 代替) Deprecated: Use GetLocalEmbeddingService() instead

func (*LocalModelEmbedding) Embedding

func (l *LocalModelEmbedding) Embedding(text string) ([]float32, error)

Embedding 实现 EmbeddingClient 接口,生成文本的嵌入向量

func (*LocalModelEmbedding) EmbeddingRaw

func (l *LocalModelEmbedding) EmbeddingRaw(text string) ([][]float32, error)

EmbeddingRaw 实现 EmbeddingClient 接口,返回原始的 embedding 结果

func (*LocalModelEmbedding) GetAddress

func (l *LocalModelEmbedding) GetAddress() string

GetAddress 获取服务地址

func (*LocalModelEmbedding) GetModel

func (l *LocalModelEmbedding) GetModel() *localmodel.Model

GetModel 获取模型配置

type MemoryVectorStore

type MemoryVectorStore struct {
	// contains filtered or unexported fields
}

MemoryVectorStore 是一个基于内存的向量存储实现适合储存临时数据,不适合储存大量数据

func (*MemoryVectorStore) Add

func (m *MemoryVectorStore) Add(docs ...*Document) error

Add 添加文档到向量存储

func (*MemoryVectorStore) Count

func (m *MemoryVectorStore) Count() (int, error)

Count 返回文档总数

func (*MemoryVectorStore) Delete

func (m *MemoryVectorStore) Delete(ids ...string) error

Delete 根据 ID 删除文档

func (*MemoryVectorStore) FuzzSearch

func (m *MemoryVectorStore) FuzzSearch(ctx context.Context, query string, limit int) (<-chan SearchResult, error)

func (*MemoryVectorStore) Get

func (m *MemoryVectorStore) Get(id string) (*Document, bool, error)

Get 根据 ID 获取文档

func (*MemoryVectorStore) List

func (m *MemoryVectorStore) List() ([]*Document, error)

List 列出所有文档

func (*MemoryVectorStore) Search

func (m *MemoryVectorStore) Search(query string, page, limit int) ([]SearchResult, error)

Search 根据查询文本检索相关文档

func (*MemoryVectorStore) SearchWithFilter

func (m *MemoryVectorStore) SearchWithFilter(query string, page, limit int, filter func(key string, getDoc func() *Document) bool) ([]SearchResult, error)

type MockEmbedder

type MockEmbedder struct {
	MockEmbedderFunc func(text string) ([]float32, error)
}

func (*MockEmbedder) Embedding

func (m *MockEmbedder) Embedding(text string) ([]float32, error)

Embedding 模拟实现 EmbeddingClient 接口

func (*MockEmbedder) EmbeddingRaw

func (m *MockEmbedder) EmbeddingRaw(text string) ([][]float32, error)

EmbeddingRaw 返回单个向量的二维数组形式

type MockEmbeddingClient

type MockEmbeddingClient struct {
	// contains filtered or unexported fields
}

func NewDefaultMockEmbedding

func NewDefaultMockEmbedding() *MockEmbeddingClient

NewDefaultMockEmbedding 创建一个默认的 MockEmbeddingClient 实例,使用预定义的词典,向量纬度为1024

func NewMockEmbedding

func NewMockEmbedding(vocabulary []string) (*MockEmbeddingClient, error)

func (*MockEmbeddingClient) Embedding

func (c *MockEmbeddingClient) Embedding(text string) ([]float32, error)

func (*MockEmbeddingClient) EmbeddingRaw

func (c *MockEmbeddingClient) EmbeddingRaw(text string) ([][]float32, error)

func (*MockEmbeddingClient) GenerateRandomText

func (c *MockEmbeddingClient) GenerateRandomText(wordCount int) string

GenerateRandomText 从词典中随机选择词汇来生成一段文本。

func (*MockEmbeddingClient) GenerateRandomWord

func (c *MockEmbeddingClient) GenerateRandomWord(wordCount int) []string

func (*MockEmbeddingClient) GenerateSimilarText

func (c *MockEmbeddingClient) GenerateSimilarText(baseText string, threshold float64) (string, error)

GenerateSimilarText 生成一个与基础文本相似度高于或等于阈值的文本。

type NodeOffsetToVectorFunc

type NodeOffsetToVectorFunc func(offset uint32) []float32

type RAGBinaryData

type RAGBinaryData struct {
	Collection *schema.VectorStoreCollection
	Documents  []*ExportVectorStoreDocument
	Version    uint32
}

RAGBinaryData 简化的RAG二进制数据结构(仅用于导入)

func LoadRAGFileHeader

func LoadRAGFileHeader(reader io.Reader) (*RAGBinaryData, error)

func LoadRAGFromBinary

func LoadRAGFromBinary(reader io.Reader) (*RAGBinaryData, error)

LoadRAGFromBinary 从二进制数据流式加载RAG格式

type RAGExportConfig

type RAGExportConfig struct {
	Ctx               context.Context
	DB                *gorm.DB // 数据库(导入时使用)
	NoHNSWIndex       bool     // 是否不包含HNSW索引(导出时使用)
	OnlyPQCode        bool     // 是否只导出PQ编码(导出时使用)
	NoMetadata        bool     // 是否不导出元数据(导出时使用)
	OverwriteExisting bool     // 是否覆盖现有数据(导入时使用)
	NoOriginInput     bool     // 是否不导出原始输入数据(导出时使用)
	RebuildHNSWIndex  bool     // 是否重新构建HNSW索引(导入时使用)

	CollectionName    string // 指定集合名称(导入时使用,可选)
	DocumentHandler   func(doc schema.VectorStoreDocument) (schema.VectorStoreDocument, error)
	OnProgressHandler func(percent float64, message string, messageType string) // 进度回调

	SerialVersionUID string // 序列化版本号(导入时使用)
	RAGID            string // RAGID(导入时使用)
}

RAGExportConfig 导入导出统一配置

func NewRAGConfig

func NewRAGConfig(opts ...RAGExportOptionFunc) *RAGExportConfig

type RAGExportOptionFunc

type RAGExportOptionFunc func(*RAGExportConfig)

func WithCollectionName

func WithCollectionName(name string) RAGExportOptionFunc

func WithContext

func WithContext(ctx context.Context) RAGExportOptionFunc

通用选项

func WithImportExportDB

func WithImportExportDB(db *gorm.DB) RAGExportOptionFunc

func WithNoHNSWGraph

func WithNoHNSWGraph(b bool) RAGExportOptionFunc

func WithNoMetadata

func WithNoMetadata(b bool) RAGExportOptionFunc

RAG 配置选项

func WithNoOriginInput

func WithNoOriginInput(b bool) RAGExportOptionFunc

func WithOnlyPQCode

func WithOnlyPQCode(b bool) RAGExportOptionFunc

func WithOverwriteExisting

func WithOverwriteExisting(b bool) RAGExportOptionFunc

func WithProgressHandler

func WithProgressHandler(handler func(percent float64, message string, messageType string)) RAGExportOptionFunc

func WithRAGID

func WithRAGID(ragID string) RAGExportOptionFunc

func WithRebuildHNSWIndex

func WithRebuildHNSWIndex(b bool) RAGExportOptionFunc

func WithSerialVersionUID

func WithSerialVersionUID(version string) RAGExportOptionFunc

type RAGSearchResult

type RAGSearchResult struct {
	Message     string      `json:"message"`
	Data        interface{} `json:"data"`
	Type        string      `json:"type"`      // message, mid_result, result
	Score       float64     `json:"score"`     // 相似度分数
	Source      string      `json:"source"`    // 结果来源(集合名称)
	Timestamp   int64       `json:"timestamp"` // 时间戳
	QueryMethod string      `json:"query_method"`
	QueryOrigin string      `json:"query_origin"`
	Index       int64       `json:"index"`
}

RAGSearchResult RAG搜索结果

func (RAGSearchResult) GetContent

func (R RAGSearchResult) GetContent() string

func (RAGSearchResult) GetScore

func (R RAGSearchResult) GetScore() float64

func (RAGSearchResult) GetSource

func (R RAGSearchResult) GetSource() string

type SQLiteVectorStoreHNSW

type SQLiteVectorStoreHNSW struct {
	EnableAutoUpdateGraphInfos bool

	UIDType string
	// contains filtered or unexported fields
}

SQLiteVectorStore 是一个基于 SQLite 的向量存储实现

func BuildVectorIndexForKnowledgeBase

func BuildVectorIndexForKnowledgeBase(db *gorm.DB, id int64, opts ...CollectionConfigFunc) (*SQLiteVectorStoreHNSW, error)

BuildVectorIndexForKnowledgeBase 构建向量索引

func BuildVectorIndexForKnowledgeBaseEntry

func BuildVectorIndexForKnowledgeBaseEntry(db *gorm.DB, knowledgeBaseId int64, id string, opts ...CollectionConfigFunc) (*SQLiteVectorStoreHNSW, error)

func CreateCollection

func CreateCollection(db *gorm.DB, name string, description string, opts ...CollectionConfigFunc) (*SQLiteVectorStoreHNSW, error)

func GetCollection

func GetCollection(db *gorm.DB, collectionName string, opts ...CollectionConfigFunc) (*SQLiteVectorStoreHNSW, error)

func LoadCollection

func LoadCollection(db *gorm.DB, name string, opts ...CollectionConfigFunc) (*SQLiteVectorStoreHNSW, error)

func LoadSQLiteVectorStoreHNSW

func LoadSQLiteVectorStoreHNSW(db *gorm.DB, collectionName string, opts ...CollectionConfigFunc) (*SQLiteVectorStoreHNSW, error)

func NewMemoryVectorStore

func NewMemoryVectorStore(embedder EmbeddingClient, opts ...CollectionConfigFunc) *SQLiteVectorStoreHNSW

NewMemoryVectorStore 创建一个新的内存向量存储

func NewSQLiteVectorStoreHNSW

func NewSQLiteVectorStoreHNSW(name string, description string, modelName string, dimension int, embedder EmbeddingClient, db *gorm.DB, options ...CollectionConfigFunc) (*SQLiteVectorStoreHNSW, error)

NewSQLiteVectorStore 创建一个新的 SQLite 向量存储

func NewSQLiteVectorStoreHNSWEx

func NewSQLiteVectorStoreHNSWEx(db *gorm.DB, name string, description string, opts ...CollectionConfigFunc) (*SQLiteVectorStoreHNSW, error)

func (*SQLiteVectorStoreHNSW) Add

func (s *SQLiteVectorStoreHNSW) Add(docs ...*Document) error

Add 添加文档到向量存储

func (*SQLiteVectorStoreHNSW) AddWithOptions

func (s *SQLiteVectorStoreHNSW) AddWithOptions(docId, content string, opts ...DocumentOption) error

func (*SQLiteVectorStoreHNSW) Clear

func (s *SQLiteVectorStoreHNSW) Clear() error

func (*SQLiteVectorStoreHNSW) ConvertToPQMode

func (s *SQLiteVectorStoreHNSW) ConvertToPQMode() error

func (*SQLiteVectorStoreHNSW) ConvertToStandardMode

func (s *SQLiteVectorStoreHNSW) ConvertToStandardMode() error

func (*SQLiteVectorStoreHNSW) Count

func (s *SQLiteVectorStoreHNSW) Count() (int, error)

Count 返回文档总数

func (*SQLiteVectorStoreHNSW) Delete

func (s *SQLiteVectorStoreHNSW) Delete(ids ...string) error

Delete 根据 ID 删除文档

func (*SQLiteVectorStoreHNSW) DeleteEmbeddingData

func (s *SQLiteVectorStoreHNSW) DeleteEmbeddingData() error

DeleteEmbeddingData 删除嵌入数据

func (*SQLiteVectorStoreHNSW) FuzzRawSearch

func (r *SQLiteVectorStoreHNSW) FuzzRawSearch(ctx context.Context, keywords string, limit int) (<-chan SearchResult, error)

FuzzRawSearch Sql 文本模糊搜索(非语义)

func (*SQLiteVectorStoreHNSW) FuzzSearch

func (s *SQLiteVectorStoreHNSW) FuzzSearch(ctx context.Context, query string, limit int) (<-chan SearchResult, error)

func (*SQLiteVectorStoreHNSW) Get

Get 根据 ID 获取文档

func (*SQLiteVectorStoreHNSW) GetArchived

func (s *SQLiteVectorStoreHNSW) GetArchived() bool

func (*SQLiteVectorStoreHNSW) GetCollectionInfo

func (s *SQLiteVectorStoreHNSW) GetCollectionInfo() *schema.VectorStoreCollection

func (*SQLiteVectorStoreHNSW) GetEmbedder

func (s *SQLiteVectorStoreHNSW) GetEmbedder() EmbeddingClient

func (*SQLiteVectorStoreHNSW) GetName

func (s *SQLiteVectorStoreHNSW) GetName() string

func (*SQLiteVectorStoreHNSW) Has

func (s *SQLiteVectorStoreHNSW) Has(docId string) bool

func (*SQLiteVectorStoreHNSW) List

func (s *SQLiteVectorStoreHNSW) List() ([]*Document, error)

List 列出所有文档

func (*SQLiteVectorStoreHNSW) LogPerformanceDiagnostics

func (s *SQLiteVectorStoreHNSW) LogPerformanceDiagnostics()

func (*SQLiteVectorStoreHNSW) PerformanceDiagnostics

func (s *SQLiteVectorStoreHNSW) PerformanceDiagnostics() map[string]interface{}

func (*SQLiteVectorStoreHNSW) Query

func (r *SQLiteVectorStoreHNSW) Query(query string, topN int, limits ...float64) ([]SearchResult, error)

Query is short for QueryTopN

func (*SQLiteVectorStoreHNSW) QueryTopN

func (r *SQLiteVectorStoreHNSW) QueryTopN(query string, topN int, limits ...float64) ([]SearchResult, error)

QueryTopN 根据查询文本检索相关文档并返回结果

func (*SQLiteVectorStoreHNSW) QueryWithFilter

func (r *SQLiteVectorStoreHNSW) QueryWithFilter(query string, page, limit int, filter func(key string, getDoc func() *Document) bool) ([]SearchResult, error)

func (*SQLiteVectorStoreHNSW) QueryWithPage

func (r *SQLiteVectorStoreHNSW) QueryWithPage(query string, page, limit int) ([]SearchResult, error)

QueryWithPage 根据查询文本检索相关文档并返回结果

func (*SQLiteVectorStoreHNSW) Remove

func (s *SQLiteVectorStoreHNSW) Remove() error

func (*SQLiteVectorStoreHNSW) Search

func (s *SQLiteVectorStoreHNSW) Search(query string, page, limit int) ([]SearchResult, error)

Search 根据查询文本检索相关文档

func (*SQLiteVectorStoreHNSW) SearchWithFilter

func (s *SQLiteVectorStoreHNSW) SearchWithFilter(query string, page, limit int, filter func(key string, getDoc func() *Document) bool) ([]SearchResult, error)

SearchWithFilter 根据查询文本检索相关文档,并根据过滤函数过滤结果

func (*SQLiteVectorStoreHNSW) SetArchived

func (s *SQLiteVectorStoreHNSW) SetArchived(archived bool) error

func (*SQLiteVectorStoreHNSW) UnSafeCount

func (s *SQLiteVectorStoreHNSW) UnSafeCount() (int, error)

func (*SQLiteVectorStoreHNSW) UpdateAutoUpdateGraphInfos

func (s *SQLiteVectorStoreHNSW) UpdateAutoUpdateGraphInfos() error

type ScoredResult

type ScoredResult struct {
	Index       int64
	QueryMethod string
	QueryOrigin string
	Document    *Document
	Score       float64
	Source      string
}

func (*ScoredResult) GetContent

func (s *ScoredResult) GetContent() string

func (*ScoredResult) GetScore

func (s *ScoredResult) GetScore() float64

func (*ScoredResult) GetScoreMethod

func (s *ScoredResult) GetScoreMethod() string

func (*ScoredResult) GetSource

func (s *ScoredResult) GetSource() string

func (*ScoredResult) GetTitle

func (s *ScoredResult) GetTitle() string

func (*ScoredResult) GetType

func (s *ScoredResult) GetType() string

func (*ScoredResult) GetUUID

func (s *ScoredResult) GetUUID() string

type SearchResult

type SearchResult struct {
	Document *Document `json:"document"` // 检索到的文档
	Score    float64   `json:"score"`    // 相似度得分 (-1 到 1 之间)
}

SearchResult 表示检索结果

func FilterResults

func FilterResults(results []SearchResult, threshold float64) []SearchResult

FilterResults 根据相似度阈值过滤搜索结果

func SimpleQuery

func SimpleQuery(db *gorm.DB, query string, limit int, opts ...CollectionQueryOption) ([]*SearchResult, error)

SimpleQuery 简化的RAG查询接口,直接返回结果

type SimpleERMAnalysisResult

type SimpleERMAnalysisResult struct {
	Entities      []*schema.ERModelEntity `json:"entities"`
	Relationships []*SimpleRelationship   `json:"relationships"`
	OriginalData  []byte                  `json:"original_data"`
}

SimpleERMAnalysisResult 简化的 ERM 分析结果结构体,避免导入循环

func (*SimpleERMAnalysisResult) GenerateDotGraph

func (e *SimpleERMAnalysisResult) GenerateDotGraph() *dot.Graph

GenerateDotGraph 生成 Dot 图 (默认从上到下布局)

func (*SimpleERMAnalysisResult) GenerateDotGraphWithDirection

func (e *SimpleERMAnalysisResult) GenerateDotGraphWithDirection(direction string) *dot.Graph

GenerateDotGraphWithDirection 生成指定方向的 Dot 图 支持的方向: - "TB": 从上到下 (Top to Bottom) - "BT": 从下到上 (Bottom to Top) - "LR": 从左到右 (Left to Right) - "RL": 从右到左 (Right to Left)

type SimpleRelationship

type SimpleRelationship struct {
	SourceTemporaryName     string `json:"source_temporary_name"`
	TargetTemporaryName     string `json:"target_temporary_name"`
	RelationshipType        string `json:"relationship_type"`
	RelationshipTypeVerbose string `json:"relationship_type_verbose"`
	DecorationAttributes    string `json:"decoration_attributes"`
}

SimpleRelationship 简化的关系结构体

type VectorStore

type VectorStore interface {
	// Add 添加文档到向量存储
	Add(docs ...*Document) error

	// Search 根据查询文本检索相关文档
	Search(query string, page, limit int) ([]SearchResult, error)

	SearchWithFilter(query string, page, limit int, filter func(key string, getDoc func() *Document) bool) ([]SearchResult, error)

	// 非语义 模糊搜1
	FuzzSearch(ctx context.Context, query string, limit int) (<-chan SearchResult, error)

	// Delete 根据 ID 删除文档
	Delete(ids ...string) error

	// Get 根据 ID 获取文档
	Get(id string) (*Document, bool, error)

	// List 列出所有文档
	List() ([]*Document, error)

	// Count 返回文档总数
	Count() (int, error)
}

VectorStore 接口定义了向量存储的基本操作

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL