Documentation
¶
Index ¶
- Constants
- Variables
- func ComputeHash(record *Record) string
- type AliyunConfig
- type AliyunHandler
- func (ah *AliyunHandler) CreateNamespace(ctx context.Context, name string) error
- func (ah *AliyunHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
- func (ah *AliyunHandler) DeleteNamespace(ctx context.Context, name string) error
- func (ah *AliyunHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
- func (ah *AliyunHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
- func (ah *AliyunHandler) ListNamespaces(ctx context.Context) ([]string, error)
- func (ah *AliyunHandler) Ping(ctx context.Context) error
- func (ah *AliyunHandler) Provider() string
- func (ah *AliyunHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
- func (ah *AliyunHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
- type BackupConfig
- type CacheEntry
- type Chunk
- type ChunkOptions
- type Chunker
- type DeleteOptions
- type DocumentHash
- type DocumentType
- type DocumentTypeDetector
- type ExportData
- type ExportFormat
- type ExportMetadata
- type Filter
- type FilterOp
- type GetOptions
- type HandlerFactoryParams
- type IncrementalUpdater
- func (iu *IncrementalUpdater) Clear()
- func (iu *IncrementalUpdater) GetChanges(records []Record) ([]Record, []string)
- func (iu *IncrementalUpdater) GetHashes() map[string]string
- func (iu *IncrementalUpdater) GetSyncStats(records []Record) SyncStats
- func (iu *IncrementalUpdater) LastSyncTime() time.Time
- func (iu *IncrementalUpdater) UpdateHashes(records []Record)
- type KnowledgeBase
- func (kb *KnowledgeBase) AddDocument(ctx context.Context, docID, title, content string, metadata map[string]any) error
- func (kb *KnowledgeBase) Backup(ctx context.Context, cfg BackupConfig) (string, error)
- func (kb *KnowledgeBase) CacheStats() map[string]any
- func (kb *KnowledgeBase) ClearCache()
- func (kb *KnowledgeBase) Close() error
- func (kb *KnowledgeBase) DeleteDocument(ctx context.Context, docID string) error
- func (kb *KnowledgeBase) ExportToFile(ctx context.Context, filepath string, format ExportFormat) error
- func (kb *KnowledgeBase) ExportToWriter(ctx context.Context, w io.Writer, format ExportFormat) error
- func (kb *KnowledgeBase) Health(ctx context.Context) error
- func (kb *KnowledgeBase) ImportFromFile(ctx context.Context, filepath string, format ExportFormat) error
- func (kb *KnowledgeBase) ImportFromReader(ctx context.Context, r io.Reader, format ExportFormat) error
- func (kb *KnowledgeBase) IncrementalAddDocuments(ctx context.Context, updater *IncrementalUpdater, records []Record) error
- func (kb *KnowledgeBase) Query(ctx context.Context, query string, topK int) ([]QueryResult, error)
- func (kb *KnowledgeBase) Restore(ctx context.Context, filepath string, format ExportFormat) error
- type KnowledgeBaseConfig
- type KnowledgeHandler
- type ListOptions
- type ListResult
- type MilvusConfig
- type MilvusHandler
- func (h *MilvusHandler) CreateNamespace(ctx context.Context, name string) error
- func (h *MilvusHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
- func (h *MilvusHandler) DeleteNamespace(ctx context.Context, name string) error
- func (h *MilvusHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
- func (h *MilvusHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
- func (h *MilvusHandler) ListNamespaces(ctx context.Context) ([]string, error)
- func (h *MilvusHandler) Ping(ctx context.Context) error
- func (h *MilvusHandler) Provider() string
- func (h *MilvusHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
- func (h *MilvusHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
- type QdrantConfig
- type QdrantHandler
- func (qh *QdrantHandler) CreateNamespace(ctx context.Context, name string) error
- func (qh *QdrantHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
- func (qh *QdrantHandler) DeleteNamespace(ctx context.Context, name string) error
- func (qh *QdrantHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
- func (qh *QdrantHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
- func (qh *QdrantHandler) ListNamespaces(ctx context.Context) ([]string, error)
- func (qh *QdrantHandler) Ping(ctx context.Context) error
- func (qh *QdrantHandler) Provider() string
- func (qh *QdrantHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
- func (qh *QdrantHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
- type QueryCache
- type QueryOptions
- type QueryResult
- type RAGFlowConfig
- type RAGFlowHandler
- func (rh *RAGFlowHandler) CreateNamespace(ctx context.Context, name string) error
- func (rh *RAGFlowHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
- func (rh *RAGFlowHandler) DeleteNamespace(ctx context.Context, name string) error
- func (rh *RAGFlowHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
- func (rh *RAGFlowHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
- func (rh *RAGFlowHandler) ListNamespaces(ctx context.Context) ([]string, error)
- func (rh *RAGFlowHandler) Ping(ctx context.Context) error
- func (rh *RAGFlowHandler) Provider() string
- func (rh *RAGFlowHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
- func (rh *RAGFlowHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
- type Record
- type RuleBasedDocumentTypeDetector
- type SyncStats
- type UpsertOptions
- type VectorCache
Constants ¶
const ( // ProviderQdrant Qdrant Vector Database ProviderQdrant = "qdrant" // ProviderMilvus Milvus Vector Database ProviderMilvus = "milvus" // ProviderRAGFlow RAGFlow RAG Engine ProviderRAGFlow = "ragflow" // ProviderAliyun Alibaba Bailian Knowledge Base ProviderAliyun = "aliyun" )
Variables ¶
var ( ErrHandlerNotFound = errors.New("handler not be null") ErrBaseURL = errors.New("BaseURL is required") ErrCollectionNotFound = errors.New("Collection is required") ErrRecordNotFound = errors.New("record not found") ErrNamespaceNotFound = errors.New("namespace not found") ErrInvalidVectorDimension = errors.New("invalid vector dimension") ErrEmptyQuery = errors.New("empty query text") ErrEmptyText = errors.New("empty text") ErrInvalidChunkOpt = errors.New("invalid chunk options") ErrNoChunks = errors.New("no chunks generated") ErrChunkerNotFound = errors.New("no suitable chunker for document type") )
Functions ¶
func ComputeHash ¶
ComputeHash computes the hash of a document
Types ¶
type AliyunConfig ¶
type AliyunConfig struct {
AccessKeyID string
AccessKeySecret string
Endpoint string
WorkspaceID string
CategoryID string
Timeout time.Duration
}
AliyunConfig configuration for Alibaba Bailian provider
type AliyunHandler ¶
type AliyunHandler struct {
WorkspaceID string
CategoryID string
Embedder embedder.Embedder
// contains filtered or unexported fields
}
AliyunHandler implements KnowledgeHandler using Alibaba Bailian. Alibaba Bailian is a cloud-based RAG service with document management, indexing, and semantic search capabilities.
Note: Alibaba Bailian does not have a namespace concept. Instead, it uses: - Workspace: Top-level organization unit - Index: Knowledge base unit (equivalent to namespace in other systems) Each Index is an independent knowledge base within a Workspace.
func (*AliyunHandler) CreateNamespace ¶
func (ah *AliyunHandler) CreateNamespace(ctx context.Context, name string) error
CreateNamespace creates a new Index (knowledge base) in Alibaba Bailian Note: In Alibaba Bailian, namespace maps to Index, which is the knowledge base unit Creating an empty index requires using the CreateIndex API
func (*AliyunHandler) Delete ¶
func (ah *AliyunHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
Delete removes records by IDs Note: Alibaba Bailian doesn't support deleting individual documents This method returns an error as per API limitations
func (*AliyunHandler) DeleteNamespace ¶
func (ah *AliyunHandler) DeleteNamespace(ctx context.Context, name string) error
DeleteNamespace deletes an Index (knowledge base) from Alibaba Bailian Note: In Alibaba Bailian, namespace maps to Index, which is the knowledge base unit This operation deletes the entire index and all its documents
func (*AliyunHandler) Get ¶
func (ah *AliyunHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
Get retrieves records by IDs using semantic search Note: Alibaba Bailian doesn't support direct document retrieval by ID This implementation searches for documents with matching IDs
func (*AliyunHandler) List ¶
func (ah *AliyunHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
List lists all records in a namespace using search Note: Alibaba Bailian doesn't have a native list API This implementation returns empty results as pagination is not supported
func (*AliyunHandler) ListNamespaces ¶
func (ah *AliyunHandler) ListNamespaces(ctx context.Context) ([]string, error)
ListNamespaces lists all Indexes (knowledge bases) in Alibaba Bailian Note: In Alibaba Bailian, each Index is an independent knowledge base This implementation returns an empty list as the API doesn't provide a list operation Users should manage indexes through the Alibaba Bailian console
func (*AliyunHandler) Ping ¶
func (ah *AliyunHandler) Ping(ctx context.Context) error
Ping checks the health of Alibaba Bailian service
func (*AliyunHandler) Provider ¶
func (ah *AliyunHandler) Provider() string
func (*AliyunHandler) Query ¶
func (ah *AliyunHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
Query searches documents in Alibaba Bailian knowledge base Note: In Alibaba Bailian, Namespace parameter maps to Index ID (knowledge base ID) Supports EnableReranking and ReturnMetadata options
func (*AliyunHandler) Upsert ¶
func (ah *AliyunHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
Upsert adds or updates records in Alibaba Bailian knowledge base Note: In Alibaba Bailian, Namespace parameter maps to Index ID (knowledge base ID) Note: This implementation validates records but document upload requires using the Data Center API For now, this is a validation-only implementation
type BackupConfig ¶
type BackupConfig struct {
FilePath string
Format ExportFormat
Timestamp bool // Add timestamp to filename
}
BackupConfig represents backup configuration
type CacheEntry ¶
type CacheEntry struct {
Results []QueryResult
Timestamp time.Time
}
CacheEntry represents a cached query result
type ChunkOptions ¶
type ChunkOptions struct {
// MaxChars is the target maximum characters per chunk. When 0, chunkers use their own defaults.
MaxChars int
// OverlapChars is the overlap size between consecutive chunks.
// If set to -1, chunkers may disable overlap.
OverlapChars int
// MinChars is a lower bound; very small chunks may be dropped/merged.
MinChars int
DocumentTitle string
// PreChunkClean is passed to base.CleanText before an LLM call.
// If nil, some implementations will enable StripMarkdown and DedupLines by default.
PreChunkClean *utils.Options
}
ChunkOptions controls chunk size, overlap and optional title metadata.
type Chunker ¶
type Chunker interface {
Provider() string
Chunk(ctx context.Context, text string, opts *ChunkOptions) ([]Chunk, error)
}
Chunker splits long text into chunks (implementations may use deterministic rules or an LLM).
type DeleteOptions ¶
type DeleteOptions struct {
Namespace string
}
type DocumentHash ¶
DocumentHash represents a document's hash for change detection
type DocumentType ¶
type DocumentType int
const ( DocumentTypeUnknown DocumentType = iota DocumentTypeStructured // 有标题、章节、段落(手册、论文、markdown) DocumentTypeTableKV // 表格、键值对、表单、简历 DocumentTypeUnstructured // 杂乱、OCR、无标点、无段落(必须 LLM) )
type DocumentTypeDetector ¶
type DocumentTypeDetector interface {
DetectDocumentType(ctx context.Context, text string) (DocumentType, error)
}
DocumentTypeDetector decides which chunking strategy should be used for a document.
type ExportData ¶
type ExportData struct {
Metadata ExportMetadata `json:"metadata"`
Records []Record `json:"records"`
}
ExportData represents the complete export data
type ExportFormat ¶
type ExportFormat string
ExportFormat defines the export format
const ( ExportFormatJSON ExportFormat = "json" ExportFormatJSONL ExportFormat = "jsonl" )
type ExportMetadata ¶
type ExportMetadata struct {
ExportedAt time.Time `json:"exported_at"`
TotalRecords int `json:"total_records"`
Handler string `json:"handler"`
Namespace string `json:"namespace"`
Version string `json:"version"`
}
ExportMetadata contains metadata about the export
type FilterOp ¶
type FilterOp string
const ( FilterOpEqual FilterOp = "$eq" FilterOpNotEqual FilterOp = "$ne" FilterOpIn FilterOp = "$in" FilterOpNotIn FilterOp = "$nin" FilterOpGt FilterOp = "$gt" FilterOpGte FilterOp = "$gte" FilterOpLt FilterOp = "$lt" FilterOpLte FilterOp = "$lte" FilterOpContainsAll FilterOp = "$all" FilterOpContainsAny FilterOp = "$any" )
type GetOptions ¶
type GetOptions struct {
Namespace string
}
type HandlerFactoryParams ¶
type HandlerFactoryParams struct {
// Provider is ProviderQdrant, ProviderMilvus, ProviderRAGFlow, or ProviderAliyun (see constants in this package).
Provider string
// Namespace is the Qdrant / Milvus collection name, RAGFlow dataset name, or Alibaba Bailian index name.
Namespace string
// QdrantConfig is required when Provider is ProviderQdrant
QdrantConfig *QdrantConfig
// MilvusConfig is required when Provider is ProviderMilvus
MilvusConfig *MilvusConfig
// RAGFlowConfig is required when Provider is ProviderRAGFlow
RAGFlowConfig *RAGFlowConfig
// AliyunConfig is required when Provider is ProviderAliyun
AliyunConfig *AliyunConfig
}
HandlerFactoryParams selects and configures a KnowledgeHandler.
type IncrementalUpdater ¶
type IncrementalUpdater struct {
// contains filtered or unexported fields
}
IncrementalUpdater manages incremental document updates
func NewIncrementalUpdater ¶
func NewIncrementalUpdater() *IncrementalUpdater
NewIncrementalUpdater creates a new incremental updater
func (*IncrementalUpdater) Clear ¶
func (iu *IncrementalUpdater) Clear()
Clear clears all stored hashes
func (*IncrementalUpdater) GetChanges ¶
func (iu *IncrementalUpdater) GetChanges(records []Record) ([]Record, []string)
GetChanges detects which documents have changed
func (*IncrementalUpdater) GetHashes ¶
func (iu *IncrementalUpdater) GetHashes() map[string]string
GetHashes returns all stored hashes
func (*IncrementalUpdater) GetSyncStats ¶
func (iu *IncrementalUpdater) GetSyncStats(records []Record) SyncStats
GetSyncStats returns synchronization statistics
func (*IncrementalUpdater) LastSyncTime ¶
func (iu *IncrementalUpdater) LastSyncTime() time.Time
LastSyncTime returns the last sync time
func (*IncrementalUpdater) UpdateHashes ¶
func (iu *IncrementalUpdater) UpdateHashes(records []Record)
UpdateHashes updates the stored hashes
type KnowledgeBase ¶
type KnowledgeBase struct {
// contains filtered or unexported fields
}
KnowledgeBase integrates embedder, search, retrieve, and vector database to provide a complete knowledge management solution.
func NewKnowledgeBase ¶
func NewKnowledgeBase(cfg KnowledgeBaseConfig) (*KnowledgeBase, error)
NewKnowledgeBase creates a new knowledge base instance
func (*KnowledgeBase) AddDocument ¶
func (kb *KnowledgeBase) AddDocument(ctx context.Context, docID, title, content string, metadata map[string]any) error
AddDocument adds a document to the knowledge base It chunks the document, generates embeddings, and stores in both vector and search engines
func (*KnowledgeBase) Backup ¶
func (kb *KnowledgeBase) Backup(ctx context.Context, cfg BackupConfig) (string, error)
Backup creates a backup of the knowledge base
func (*KnowledgeBase) CacheStats ¶
func (kb *KnowledgeBase) CacheStats() map[string]any
CacheStats returns cache statistics
func (*KnowledgeBase) ClearCache ¶
func (kb *KnowledgeBase) ClearCache()
ClearCache clears all cached data
func (*KnowledgeBase) DeleteDocument ¶
func (kb *KnowledgeBase) DeleteDocument(ctx context.Context, docID string) error
DeleteDocument removes a document from the knowledge base
func (*KnowledgeBase) ExportToFile ¶
func (kb *KnowledgeBase) ExportToFile(ctx context.Context, filepath string, format ExportFormat) error
ExportToFile exports knowledge base to a file
func (*KnowledgeBase) ExportToWriter ¶
func (kb *KnowledgeBase) ExportToWriter(ctx context.Context, w io.Writer, format ExportFormat) error
ExportToWriter exports knowledge base to a writer
func (*KnowledgeBase) Health ¶
func (kb *KnowledgeBase) Health(ctx context.Context) error
Health checks the health of all components
func (*KnowledgeBase) ImportFromFile ¶
func (kb *KnowledgeBase) ImportFromFile(ctx context.Context, filepath string, format ExportFormat) error
ImportFromFile imports knowledge base from a file
func (*KnowledgeBase) ImportFromReader ¶
func (kb *KnowledgeBase) ImportFromReader(ctx context.Context, r io.Reader, format ExportFormat) error
ImportFromReader imports knowledge base from a reader
func (*KnowledgeBase) IncrementalAddDocuments ¶
func (kb *KnowledgeBase) IncrementalAddDocuments(ctx context.Context, updater *IncrementalUpdater, records []Record) error
IncrementalAddDocuments adds documents with change detection
func (*KnowledgeBase) Query ¶
func (kb *KnowledgeBase) Query(ctx context.Context, query string, topK int) ([]QueryResult, error)
Query searches the knowledge base using hybrid retrieval
func (*KnowledgeBase) Restore ¶
func (kb *KnowledgeBase) Restore(ctx context.Context, filepath string, format ExportFormat) error
Restore restores the knowledge base from a backup
type KnowledgeBaseConfig ¶
type KnowledgeBaseConfig struct {
// Vector database handler
Handler KnowledgeHandler
// Text embedder for semantic search
Embedder embedder.Embedder
// Full-text search engine
Searcher search.Engine
// Multi-strategy retriever
Retriever retrieve.StrategyRetriever
// Document type detector
Detector DocumentTypeDetector
// Chunkers for different document types
Chunkers map[DocumentType]Chunker
// Default namespace for queries (optional)
Namespace string
// Enable query result caching (optional, default: true)
EnableCache bool
// Query cache size (optional, default: 1000)
QueryCacheSize int
// Vector cache size (optional, default: 10000)
VectorCacheSize int
}
KnowledgeBaseConfig configuration for KnowledgeBase
type KnowledgeHandler ¶
type KnowledgeHandler interface {
Provider() string
// Upsert write and update files
Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
// Query Query for txt
Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
// Get get by id
Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
// List list query for page
List(ctx context.Context, opts *ListOptions) (*ListResult, error)
// Delete delete file document
Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
// Ping health check
Ping(ctx context.Context) error
// CreateNamespace create new namespace
CreateNamespace(ctx context.Context, name string) error
// DeleteNamespace delete namespack
DeleteNamespace(ctx context.Context, name string) error
// ListNamespaces List database namespace
ListNamespaces(ctx context.Context) ([]string, error)
}
KnowledgeHandler abstract knowledge interface
func NewKnowledgeHandler ¶
func NewKnowledgeHandler(p HandlerFactoryParams) (KnowledgeHandler, error)
NewKnowledgeHandler returns a backend implementation for the given namespace configuration.
type ListOptions ¶
type ListResult ¶
type MilvusConfig ¶
type MilvusConfig struct {
Address string
Username string
Password string
Token string
DBName string
}
MilvusConfig configuration for Milvus provider
type MilvusHandler ¶
type MilvusHandler struct {
Address string
Username string
Password string
Token string
DBName string
Embedder embedder.Embedder
// contains filtered or unexported fields
}
MilvusHandler implements KnowledgeHandler using Milvus.
- id (VarChar primary key) - vector (FloatVector) - content/title/source/tags/metadata_json (VarChar) - org_id/doc_id/file_hash (VarChar) for simple filtering compatibility - created_at/updated_at (Int64 unix seconds)
func (*MilvusHandler) CreateNamespace ¶
func (h *MilvusHandler) CreateNamespace(ctx context.Context, name string) error
func (*MilvusHandler) Delete ¶
func (h *MilvusHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
func (*MilvusHandler) DeleteNamespace ¶
func (h *MilvusHandler) DeleteNamespace(ctx context.Context, name string) error
func (*MilvusHandler) Get ¶
func (h *MilvusHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
func (*MilvusHandler) List ¶
func (h *MilvusHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
func (*MilvusHandler) ListNamespaces ¶
func (h *MilvusHandler) ListNamespaces(ctx context.Context) ([]string, error)
func (*MilvusHandler) Provider ¶
func (h *MilvusHandler) Provider() string
func (*MilvusHandler) Query ¶
func (h *MilvusHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
func (*MilvusHandler) Upsert ¶
func (h *MilvusHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
type QdrantConfig ¶
QdrantConfig configuration for Qdrant provider
type QdrantHandler ¶
type QdrantHandler struct {
BaseURL string
APIKey string
HTTPClient *http.Client
Embedder embedder.Embedder
}
func (*QdrantHandler) CreateNamespace ¶
func (qh *QdrantHandler) CreateNamespace(ctx context.Context, name string) error
func (*QdrantHandler) Delete ¶
func (qh *QdrantHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
func (*QdrantHandler) DeleteNamespace ¶
func (qh *QdrantHandler) DeleteNamespace(ctx context.Context, name string) error
func (*QdrantHandler) Get ¶
func (qh *QdrantHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
func (*QdrantHandler) List ¶
func (qh *QdrantHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
func (*QdrantHandler) ListNamespaces ¶
func (qh *QdrantHandler) ListNamespaces(ctx context.Context) ([]string, error)
func (*QdrantHandler) Provider ¶
func (qh *QdrantHandler) Provider() string
func (*QdrantHandler) Query ¶
func (qh *QdrantHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
func (*QdrantHandler) Upsert ¶
func (qh *QdrantHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
type QueryCache ¶
type QueryCache struct {
// contains filtered or unexported fields
}
QueryCache provides caching for query results to improve performance
func NewQueryCache ¶
func NewQueryCache(maxSize int, ttl time.Duration) *QueryCache
NewQueryCache creates a new query cache with specified size and TTL
func (*QueryCache) Get ¶
func (qc *QueryCache) Get(ctx context.Context, query string) ([]QueryResult, bool)
Get retrieves a cached query result
func (*QueryCache) Set ¶
func (qc *QueryCache) Set(ctx context.Context, query string, results []QueryResult)
Set stores a query result in the cache
func (*QueryCache) Stats ¶
func (qc *QueryCache) Stats() map[string]any
Stats returns cache statistics
type QueryOptions ¶
type QueryResult ¶
type RAGFlowConfig ¶
RAGFlowConfig configuration for RAGFlow provider
type RAGFlowHandler ¶
type RAGFlowHandler struct {
BaseURL string
APIKey string
HTTPClient *http.Client
Embedder embedder.Embedder
}
RAGFlowHandler implements KnowledgeHandler using RAGFlow. RAGFlow is an open-source RAG engine that provides document management, chunking, and vector search capabilities.
func (*RAGFlowHandler) CreateNamespace ¶
func (rh *RAGFlowHandler) CreateNamespace(ctx context.Context, name string) error
CreateNamespace creates a new dataset in RAGFlow
func (*RAGFlowHandler) Delete ¶
func (rh *RAGFlowHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
Delete removes records by IDs
func (*RAGFlowHandler) DeleteNamespace ¶
func (rh *RAGFlowHandler) DeleteNamespace(ctx context.Context, name string) error
DeleteNamespace deletes a dataset from RAGFlow
func (*RAGFlowHandler) Get ¶
func (rh *RAGFlowHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
Get retrieves records by IDs
func (*RAGFlowHandler) List ¶
func (rh *RAGFlowHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
List lists all records in a namespace
func (*RAGFlowHandler) ListNamespaces ¶
func (rh *RAGFlowHandler) ListNamespaces(ctx context.Context) ([]string, error)
ListNamespaces lists all datasets in RAGFlow
func (*RAGFlowHandler) Ping ¶
func (rh *RAGFlowHandler) Ping(ctx context.Context) error
Ping checks the health of RAGFlow service
func (*RAGFlowHandler) Provider ¶
func (rh *RAGFlowHandler) Provider() string
func (*RAGFlowHandler) Query ¶
func (rh *RAGFlowHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
Query searches documents in RAGFlow knowledge base
func (*RAGFlowHandler) Upsert ¶
func (rh *RAGFlowHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
Upsert adds or updates records in RAGFlow knowledge base
type Record ¶
type Record struct {
ID string `json:"id"`
Source string `json:"source"` // 来源file/url/api etc.
Title string `json:"title"`
Content string `json:"content"` // 原文片段
Vector []float32 `json:"vector"` // 向量
Tags []string `json:"tags"`
Metadata map[string]any `json:"metadata"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
Record 知识库记录
type RuleBasedDocumentTypeDetector ¶
type RuleBasedDocumentTypeDetector struct{}
RuleBasedDocumentTypeDetector classifies documents by simple heuristics.
Targets: - Structured (90%): manuals, papers, contracts, reports, markdown - Table/KV (5%): resumes, forms, questionnaires, excel-to-text, financial docs - Unstructured noisy (5%): OCR text, novels, garbled webpages, chat logs
func (*RuleBasedDocumentTypeDetector) DetectDocumentType ¶
func (d *RuleBasedDocumentTypeDetector) DetectDocumentType(ctx context.Context, text string) (DocumentType, error)
type SyncStats ¶
type SyncStats struct {
Added int
Updated int
Deleted int
Unchanged int
TotalTime time.Duration
LastSyncAt time.Time
}
SyncStats represents synchronization statistics
type UpsertOptions ¶
type VectorCache ¶
type VectorCache struct {
// contains filtered or unexported fields
}
VectorCache provides caching for embedding vectors
func NewVectorCache ¶
func NewVectorCache(maxSize int) *VectorCache
NewVectorCache creates a new vector cache
func (*VectorCache) Get ¶
func (vc *VectorCache) Get(text string) ([]float32, bool)
Get retrieves a cached vector
func (*VectorCache) Set ¶
func (vc *VectorCache) Set(text string, vector []float32)
Set stores a vector in the cache