knowledge

package

v1.2.0 Latest Latest Go to latest Published: Jun 6, 2026 License: MIT Imports: 28 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/LingByte/lingllm

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func ComputeHash(record *Record) string
type AliyunConfig
type AliyunHandler
- func (ah *AliyunHandler) CreateNamespace(ctx context.Context, name string) error
- func (ah *AliyunHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
- func (ah *AliyunHandler) DeleteNamespace(ctx context.Context, name string) error
- func (ah *AliyunHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
- func (ah *AliyunHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
- func (ah *AliyunHandler) ListNamespaces(ctx context.Context) ([]string, error)
- func (ah *AliyunHandler) Ping(ctx context.Context) error
- func (ah *AliyunHandler) Provider() string
- func (ah *AliyunHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
- func (ah *AliyunHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
type BackupConfig
type CacheEntry
type Chunk
type ChunkOptions
type Chunker
type DeleteOptions
type DocumentHash
type DocumentType
type DocumentTypeDetector
type ExportData
type ExportFormat
type ExportMetadata
type Filter
type FilterOp
type GetOptions
type HandlerFactoryParams
type IncrementalUpdater
- func NewIncrementalUpdater() *IncrementalUpdater
- func (iu *IncrementalUpdater) Clear()
- func (iu *IncrementalUpdater) GetChanges(records []Record) ([]Record, []string)
- func (iu *IncrementalUpdater) GetHashes() map[string]string
- func (iu *IncrementalUpdater) GetSyncStats(records []Record) SyncStats
- func (iu *IncrementalUpdater) LastSyncTime() time.Time
- func (iu *IncrementalUpdater) UpdateHashes(records []Record)
type KnowledgeBase
- func NewKnowledgeBase(cfg KnowledgeBaseConfig) (*KnowledgeBase, error)
- func (kb *KnowledgeBase) AddDocument(ctx context.Context, docID, title, content string, metadata map[string]any) error
- func (kb *KnowledgeBase) Backup(ctx context.Context, cfg BackupConfig) (string, error)
- func (kb *KnowledgeBase) CacheStats() map[string]any
- func (kb *KnowledgeBase) ClearCache()
- func (kb *KnowledgeBase) Close() error
- func (kb *KnowledgeBase) DeleteDocument(ctx context.Context, docID string) error
- func (kb *KnowledgeBase) ExportToFile(ctx context.Context, filepath string, format ExportFormat) error
- func (kb *KnowledgeBase) ExportToWriter(ctx context.Context, w io.Writer, format ExportFormat) error
- func (kb *KnowledgeBase) Health(ctx context.Context) error
- func (kb *KnowledgeBase) ImportFromFile(ctx context.Context, filepath string, format ExportFormat) error
- func (kb *KnowledgeBase) ImportFromReader(ctx context.Context, r io.Reader, format ExportFormat) error
- func (kb *KnowledgeBase) IncrementalAddDocuments(ctx context.Context, updater *IncrementalUpdater, records []Record) error
- func (kb *KnowledgeBase) Query(ctx context.Context, query string, topK int) ([]QueryResult, error)
- func (kb *KnowledgeBase) Restore(ctx context.Context, filepath string, format ExportFormat) error
type KnowledgeBaseConfig
type KnowledgeHandler
- func NewKnowledgeHandler(p HandlerFactoryParams) (KnowledgeHandler, error)
type ListOptions
type ListResult
type MilvusConfig
type MilvusHandler
- func (h *MilvusHandler) CreateNamespace(ctx context.Context, name string) error
- func (h *MilvusHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
- func (h *MilvusHandler) DeleteNamespace(ctx context.Context, name string) error
- func (h *MilvusHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
- func (h *MilvusHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
- func (h *MilvusHandler) ListNamespaces(ctx context.Context) ([]string, error)
- func (h *MilvusHandler) Ping(ctx context.Context) error
- func (h *MilvusHandler) Provider() string
- func (h *MilvusHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
- func (h *MilvusHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
type QdrantConfig
type QdrantHandler
- func (qh *QdrantHandler) CreateNamespace(ctx context.Context, name string) error
- func (qh *QdrantHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
- func (qh *QdrantHandler) DeleteNamespace(ctx context.Context, name string) error
- func (qh *QdrantHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
- func (qh *QdrantHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
- func (qh *QdrantHandler) ListNamespaces(ctx context.Context) ([]string, error)
- func (qh *QdrantHandler) Ping(ctx context.Context) error
- func (qh *QdrantHandler) Provider() string
- func (qh *QdrantHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
- func (qh *QdrantHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
type QueryCache
- func NewQueryCache(maxSize int, ttl time.Duration) *QueryCache
- func (qc *QueryCache) Clear()
- func (qc *QueryCache) Get(ctx context.Context, query string) ([]QueryResult, bool)
- func (qc *QueryCache) Set(ctx context.Context, query string, results []QueryResult)
- func (qc *QueryCache) Stats() map[string]any
type QueryOptions
type QueryResult
type RAGFlowConfig
type RAGFlowHandler
- func (rh *RAGFlowHandler) CreateNamespace(ctx context.Context, name string) error
- func (rh *RAGFlowHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error
- func (rh *RAGFlowHandler) DeleteNamespace(ctx context.Context, name string) error
- func (rh *RAGFlowHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)
- func (rh *RAGFlowHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)
- func (rh *RAGFlowHandler) ListNamespaces(ctx context.Context) ([]string, error)
- func (rh *RAGFlowHandler) Ping(ctx context.Context) error
- func (rh *RAGFlowHandler) Provider() string
- func (rh *RAGFlowHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)
- func (rh *RAGFlowHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error
type Record
type RuleBasedDocumentTypeDetector
- func (d *RuleBasedDocumentTypeDetector) DetectDocumentType(ctx context.Context, text string) (DocumentType, error)
type SyncStats
type UpsertOptions
type VectorCache
- func NewVectorCache(maxSize int) *VectorCache
- func (vc *VectorCache) Clear()
- func (vc *VectorCache) Get(text string) ([]float32, bool)
- func (vc *VectorCache) Set(text string, vector []float32)
- func (vc *VectorCache) Size() int

Constants ¶

View Source

const (
	// ProviderQdrant Qdrant Vector Database
	ProviderQdrant = "qdrant"

	// ProviderMilvus Milvus Vector Database
	ProviderMilvus = "milvus"

	// ProviderRAGFlow RAGFlow RAG Engine
	ProviderRAGFlow = "ragflow"

	// ProviderAliyun Alibaba Bailian Knowledge Base
	ProviderAliyun = "aliyun"
)

Variables ¶

View Source

var (
	ErrHandlerNotFound        = errors.New("handler not be null")
	ErrBaseURL                = errors.New("BaseURL is required")
	ErrCollectionNotFound     = errors.New("Collection is required")
	ErrRecordNotFound         = errors.New("record not found")
	ErrNamespaceNotFound      = errors.New("namespace not found")
	ErrInvalidVectorDimension = errors.New("invalid vector dimension")
	ErrEmptyQuery             = errors.New("empty query text")
	ErrEmptyText              = errors.New("empty text")
	ErrInvalidChunkOpt        = errors.New("invalid chunk options")
	ErrNoChunks               = errors.New("no chunks generated")
	ErrChunkerNotFound        = errors.New("no suitable chunker for document type")
)

Functions ¶

func ComputeHash ¶

func ComputeHash(record *Record) string

ComputeHash computes the hash of a document

Types ¶

type AliyunConfig ¶

type AliyunConfig struct {
	AccessKeyID     string
	AccessKeySecret string
	Endpoint        string
	WorkspaceID     string
	CategoryID      string
	Timeout         time.Duration
}

AliyunConfig configuration for Alibaba Bailian provider

type AliyunHandler ¶

type AliyunHandler struct {
	WorkspaceID string
	CategoryID  string
	Embedder    embedder.Embedder
	// contains filtered or unexported fields
}

AliyunHandler implements KnowledgeHandler using Alibaba Bailian. Alibaba Bailian is a cloud-based RAG service with document management, indexing, and semantic search capabilities.

Note: Alibaba Bailian does not have a namespace concept. Instead, it uses: - Workspace: Top-level organization unit - Index: Knowledge base unit (equivalent to namespace in other systems) Each Index is an independent knowledge base within a Workspace.

func (*AliyunHandler) CreateNamespace ¶

func (ah *AliyunHandler) CreateNamespace(ctx context.Context, name string) error

CreateNamespace creates a new Index (knowledge base) in Alibaba Bailian Note: In Alibaba Bailian, namespace maps to Index, which is the knowledge base unit Creating an empty index requires using the CreateIndex API

func (*AliyunHandler) Delete ¶

func (ah *AliyunHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error

Delete removes records by IDs Note: Alibaba Bailian doesn't support deleting individual documents This method returns an error as per API limitations

func (*AliyunHandler) DeleteNamespace ¶

func (ah *AliyunHandler) DeleteNamespace(ctx context.Context, name string) error

DeleteNamespace deletes an Index (knowledge base) from Alibaba Bailian Note: In Alibaba Bailian, namespace maps to Index, which is the knowledge base unit This operation deletes the entire index and all its documents

func (*AliyunHandler) Get ¶

func (ah *AliyunHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)

Get retrieves records by IDs using semantic search Note: Alibaba Bailian doesn't support direct document retrieval by ID This implementation searches for documents with matching IDs

func (*AliyunHandler) List ¶

func (ah *AliyunHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)

List lists all records in a namespace using search Note: Alibaba Bailian doesn't have a native list API This implementation returns empty results as pagination is not supported

func (*AliyunHandler) ListNamespaces ¶

func (ah *AliyunHandler) ListNamespaces(ctx context.Context) ([]string, error)

ListNamespaces lists all Indexes (knowledge bases) in Alibaba Bailian Note: In Alibaba Bailian, each Index is an independent knowledge base This implementation returns an empty list as the API doesn't provide a list operation Users should manage indexes through the Alibaba Bailian console

func (*AliyunHandler) Ping ¶

func (ah *AliyunHandler) Ping(ctx context.Context) error

Ping checks the health of Alibaba Bailian service

func (*AliyunHandler) Provider ¶

func (ah *AliyunHandler) Provider() string

func (*AliyunHandler) Query ¶

func (ah *AliyunHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)

Query searches documents in Alibaba Bailian knowledge base Note: In Alibaba Bailian, Namespace parameter maps to Index ID (knowledge base ID) Supports EnableReranking and ReturnMetadata options

func (*AliyunHandler) Upsert ¶

func (ah *AliyunHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error

Upsert adds or updates records in Alibaba Bailian knowledge base Note: In Alibaba Bailian, Namespace parameter maps to Index ID (knowledge base ID) Note: This implementation validates records but document upload requires using the Data Center API For now, this is a validation-only implementation

type BackupConfig ¶

type BackupConfig struct {
	FilePath  string
	Format    ExportFormat
	Timestamp bool // Add timestamp to filename
}

BackupConfig represents backup configuration

type CacheEntry ¶

type CacheEntry struct {
	Results   []QueryResult
	Timestamp time.Time
}

CacheEntry represents a cached query result

type Chunk ¶

type Chunk struct {
	Index    int
	Title    string
	Text     string
	Metadata map[string]any
}

Chunk is one retrieval-oriented segment produced by a Chunker.

type ChunkOptions ¶

type ChunkOptions struct {
	// MaxChars is the target maximum characters per chunk. When 0, chunkers use their own defaults.
	MaxChars int
	// OverlapChars is the overlap size between consecutive chunks.
	// If set to -1, chunkers may disable overlap.
	OverlapChars int
	// MinChars is a lower bound; very small chunks may be dropped/merged.
	MinChars int

	DocumentTitle string

	// PreChunkClean is passed to base.CleanText before an LLM call.
	// If nil, some implementations will enable StripMarkdown and DedupLines by default.
	PreChunkClean *utils.Options
}

ChunkOptions controls chunk size, overlap and optional title metadata.

type Chunker ¶

type Chunker interface {
	Provider() string
	Chunk(ctx context.Context, text string, opts *ChunkOptions) ([]Chunk, error)
}

Chunker splits long text into chunks (implementations may use deterministic rules or an LLM).

type DeleteOptions ¶

type DeleteOptions struct {
	Namespace string
}

type DocumentHash ¶

type DocumentHash struct {
	ID        string
	Hash      string
	UpdatedAt time.Time
}

DocumentHash represents a document's hash for change detection

type DocumentType ¶

type DocumentType int

const (
	DocumentTypeUnknown      DocumentType = iota
	DocumentTypeStructured                // 有标题、章节、段落（手册、论文、markdown）
	DocumentTypeTableKV                   // 表格、键值对、表单、简历
	DocumentTypeUnstructured              // 杂乱、OCR、无标点、无段落（必须 LLM）
)

type DocumentTypeDetector ¶

type DocumentTypeDetector interface {
	DetectDocumentType(ctx context.Context, text string) (DocumentType, error)
}

DocumentTypeDetector decides which chunking strategy should be used for a document.

type ExportData ¶

type ExportData struct {
	Metadata ExportMetadata `json:"metadata"`
	Records  []Record       `json:"records"`
}

ExportData represents the complete export data

type ExportFormat ¶

type ExportFormat string

ExportFormat defines the export format

const (
	ExportFormatJSON  ExportFormat = "json"
	ExportFormatJSONL ExportFormat = "jsonl"
)

type ExportMetadata ¶

type ExportMetadata struct {
	ExportedAt   time.Time `json:"exported_at"`
	TotalRecords int       `json:"total_records"`
	Handler      string    `json:"handler"`
	Namespace    string    `json:"namespace"`
	Version      string    `json:"version"`
}

ExportMetadata contains metadata about the export

type Filter ¶

type Filter struct {
	Field    string   `json:"field"`
	Operator FilterOp `json:"operator"`
	Value    []any    `json:"value"`
}

type FilterOp ¶

type FilterOp string

const (
	FilterOpEqual       FilterOp = "$eq"
	FilterOpNotEqual    FilterOp = "$ne"
	FilterOpIn          FilterOp = "$in"
	FilterOpNotIn       FilterOp = "$nin"
	FilterOpGt          FilterOp = "$gt"
	FilterOpGte         FilterOp = "$gte"
	FilterOpLt          FilterOp = "$lt"
	FilterOpLte         FilterOp = "$lte"
	FilterOpContainsAll FilterOp = "$all"
	FilterOpContainsAny FilterOp = "$any"
)

type GetOptions ¶

type GetOptions struct {
	Namespace string
}

type HandlerFactoryParams ¶

type HandlerFactoryParams struct {
	// Provider is ProviderQdrant, ProviderMilvus, ProviderRAGFlow, or ProviderAliyun (see constants in this package).
	Provider string
	// Namespace is the Qdrant / Milvus collection name, RAGFlow dataset name, or Alibaba Bailian index name.
	Namespace string
	// QdrantConfig is required when Provider is ProviderQdrant
	QdrantConfig *QdrantConfig
	// MilvusConfig is required when Provider is ProviderMilvus
	MilvusConfig *MilvusConfig
	// RAGFlowConfig is required when Provider is ProviderRAGFlow
	RAGFlowConfig *RAGFlowConfig
	// AliyunConfig is required when Provider is ProviderAliyun
	AliyunConfig *AliyunConfig
}

HandlerFactoryParams selects and configures a KnowledgeHandler.

type IncrementalUpdater ¶

type IncrementalUpdater struct {
	// contains filtered or unexported fields
}

IncrementalUpdater manages incremental document updates

func NewIncrementalUpdater ¶

func NewIncrementalUpdater() *IncrementalUpdater

NewIncrementalUpdater creates a new incremental updater

func (*IncrementalUpdater) Clear ¶

func (iu *IncrementalUpdater) Clear()

Clear clears all stored hashes

func (*IncrementalUpdater) GetChanges ¶

func (iu *IncrementalUpdater) GetChanges(records []Record) ([]Record, []string)

GetChanges detects which documents have changed

func (*IncrementalUpdater) GetHashes ¶

func (iu *IncrementalUpdater) GetHashes() map[string]string

GetHashes returns all stored hashes

func (*IncrementalUpdater) GetSyncStats ¶

func (iu *IncrementalUpdater) GetSyncStats(records []Record) SyncStats

GetSyncStats returns synchronization statistics

func (*IncrementalUpdater) LastSyncTime ¶

func (iu *IncrementalUpdater) LastSyncTime() time.Time

LastSyncTime returns the last sync time

func (*IncrementalUpdater) UpdateHashes ¶

func (iu *IncrementalUpdater) UpdateHashes(records []Record)

UpdateHashes updates the stored hashes

type KnowledgeBase ¶

type KnowledgeBase struct {
	// contains filtered or unexported fields
}

KnowledgeBase integrates embedder, search, retrieve, and vector database to provide a complete knowledge management solution.

func NewKnowledgeBase ¶

func NewKnowledgeBase(cfg KnowledgeBaseConfig) (*KnowledgeBase, error)

NewKnowledgeBase creates a new knowledge base instance

func (*KnowledgeBase) AddDocument ¶

func (kb *KnowledgeBase) AddDocument(ctx context.Context, docID, title, content string, metadata map[string]any) error

AddDocument adds a document to the knowledge base It chunks the document, generates embeddings, and stores in both vector and search engines

func (*KnowledgeBase) Backup ¶

func (kb *KnowledgeBase) Backup(ctx context.Context, cfg BackupConfig) (string, error)

Backup creates a backup of the knowledge base

func (*KnowledgeBase) CacheStats ¶

func (kb *KnowledgeBase) CacheStats() map[string]any

CacheStats returns cache statistics

func (*KnowledgeBase) ClearCache ¶

func (kb *KnowledgeBase) ClearCache()

ClearCache clears all cached data

func (*KnowledgeBase) Close ¶

func (kb *KnowledgeBase) Close() error

Close closes all resources

func (*KnowledgeBase) DeleteDocument ¶

func (kb *KnowledgeBase) DeleteDocument(ctx context.Context, docID string) error

DeleteDocument removes a document from the knowledge base

func (*KnowledgeBase) ExportToFile ¶

func (kb *KnowledgeBase) ExportToFile(ctx context.Context, filepath string, format ExportFormat) error

ExportToFile exports knowledge base to a file

func (*KnowledgeBase) ExportToWriter ¶

func (kb *KnowledgeBase) ExportToWriter(ctx context.Context, w io.Writer, format ExportFormat) error

ExportToWriter exports knowledge base to a writer

func (*KnowledgeBase) Health ¶

func (kb *KnowledgeBase) Health(ctx context.Context) error

Health checks the health of all components

func (*KnowledgeBase) ImportFromFile ¶

func (kb *KnowledgeBase) ImportFromFile(ctx context.Context, filepath string, format ExportFormat) error

ImportFromFile imports knowledge base from a file

func (*KnowledgeBase) ImportFromReader ¶

func (kb *KnowledgeBase) ImportFromReader(ctx context.Context, r io.Reader, format ExportFormat) error

ImportFromReader imports knowledge base from a reader

func (*KnowledgeBase) IncrementalAddDocuments ¶

func (kb *KnowledgeBase) IncrementalAddDocuments(ctx context.Context, updater *IncrementalUpdater, records []Record) error

IncrementalAddDocuments adds documents with change detection

func (*KnowledgeBase) Query ¶

func (kb *KnowledgeBase) Query(ctx context.Context, query string, topK int) ([]QueryResult, error)

Query searches the knowledge base using hybrid retrieval

func (*KnowledgeBase) Restore ¶

func (kb *KnowledgeBase) Restore(ctx context.Context, filepath string, format ExportFormat) error

Restore restores the knowledge base from a backup

type KnowledgeBaseConfig ¶

type KnowledgeBaseConfig struct {
	// Vector database handler
	Handler KnowledgeHandler

	// Text embedder for semantic search
	Embedder embedder.Embedder

	// Full-text search engine
	Searcher search.Engine

	// Multi-strategy retriever
	Retriever retrieve.StrategyRetriever

	// Document type detector
	Detector DocumentTypeDetector

	// Chunkers for different document types
	Chunkers map[DocumentType]Chunker

	// Default namespace for queries (optional)
	Namespace string

	// Enable query result caching (optional, default: true)
	EnableCache bool

	// Query cache size (optional, default: 1000)
	QueryCacheSize int

	// Vector cache size (optional, default: 10000)
	VectorCacheSize int
}

KnowledgeBaseConfig configuration for KnowledgeBase

type KnowledgeHandler ¶

type KnowledgeHandler interface {
	Provider() string

	// Upsert write and update files
	Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error

	// Query Query for txt
	Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)

	// Get get by id
	Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)

	// List list query for page
	List(ctx context.Context, opts *ListOptions) (*ListResult, error)

	// Delete delete file document
	Delete(ctx context.Context, ids []string, opts *DeleteOptions) error

	// Ping health check
	Ping(ctx context.Context) error

	// CreateNamespace create new namespace
	CreateNamespace(ctx context.Context, name string) error

	// DeleteNamespace delete namespack
	DeleteNamespace(ctx context.Context, name string) error

	// ListNamespaces List database namespace
	ListNamespaces(ctx context.Context) ([]string, error)
}

KnowledgeHandler abstract knowledge interface

func NewKnowledgeHandler ¶

func NewKnowledgeHandler(p HandlerFactoryParams) (KnowledgeHandler, error)

NewKnowledgeHandler returns a backend implementation for the given namespace configuration.

type ListOptions ¶

type ListOptions struct {
	Namespace string
	Limit     int
	Offset    string
	Filters   []Filter
	OrderBy   string // "created_at" "updated_at"
	OrderDir  string // "asc" "desc"
}

type ListResult ¶

type ListResult struct {
	Records    []Record `json:"records"`
	NextOffset string   `json:"next_offset,omitempty"`
}

type MilvusConfig ¶

type MilvusConfig struct {
	Address  string
	Username string
	Password string
	Token    string
	DBName   string
}

MilvusConfig configuration for Milvus provider

type MilvusHandler ¶

type MilvusHandler struct {
	Address  string
	Username string
	Password string
	Token    string
	DBName   string

	Embedder embedder.Embedder
	// contains filtered or unexported fields
}

MilvusHandler implements KnowledgeHandler using Milvus.

- id (VarChar primary key) - vector (FloatVector) - content/title/source/tags/metadata_json (VarChar) - org_id/doc_id/file_hash (VarChar) for simple filtering compatibility - created_at/updated_at (Int64 unix seconds)

func (*MilvusHandler) CreateNamespace ¶

func (h *MilvusHandler) CreateNamespace(ctx context.Context, name string) error

func (*MilvusHandler) Delete ¶

func (h *MilvusHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error

func (*MilvusHandler) DeleteNamespace ¶

func (h *MilvusHandler) DeleteNamespace(ctx context.Context, name string) error

func (*MilvusHandler) Get ¶

func (h *MilvusHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)

func (*MilvusHandler) List ¶

func (h *MilvusHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)

func (*MilvusHandler) ListNamespaces ¶

func (h *MilvusHandler) ListNamespaces(ctx context.Context) ([]string, error)

func (*MilvusHandler) Ping ¶

func (h *MilvusHandler) Ping(ctx context.Context) error

func (*MilvusHandler) Provider ¶

func (h *MilvusHandler) Provider() string

func (*MilvusHandler) Query ¶

func (h *MilvusHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)

func (*MilvusHandler) Upsert ¶

func (h *MilvusHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error

type QdrantConfig ¶

type QdrantConfig struct {
	BaseURL string
	APIKey  string
	Timeout time.Duration
}

QdrantConfig configuration for Qdrant provider

type QdrantHandler ¶

type QdrantHandler struct {
	BaseURL    string
	APIKey     string
	HTTPClient *http.Client
	Embedder   embedder.Embedder
}

func (*QdrantHandler) CreateNamespace ¶

func (qh *QdrantHandler) CreateNamespace(ctx context.Context, name string) error

func (*QdrantHandler) Delete ¶

func (qh *QdrantHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error

func (*QdrantHandler) DeleteNamespace ¶

func (qh *QdrantHandler) DeleteNamespace(ctx context.Context, name string) error

func (*QdrantHandler) Get ¶

func (qh *QdrantHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)

func (*QdrantHandler) List ¶

func (qh *QdrantHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)

func (*QdrantHandler) ListNamespaces ¶

func (qh *QdrantHandler) ListNamespaces(ctx context.Context) ([]string, error)

func (*QdrantHandler) Ping ¶

func (qh *QdrantHandler) Ping(ctx context.Context) error

func (*QdrantHandler) Provider ¶

func (qh *QdrantHandler) Provider() string

func (*QdrantHandler) Query ¶

func (qh *QdrantHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)

func (*QdrantHandler) Upsert ¶

func (qh *QdrantHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error

type QueryCache ¶

type QueryCache struct {
	// contains filtered or unexported fields
}

QueryCache provides caching for query results to improve performance

func NewQueryCache ¶

func NewQueryCache(maxSize int, ttl time.Duration) *QueryCache

NewQueryCache creates a new query cache with specified size and TTL

func (*QueryCache) Clear ¶

func (qc *QueryCache) Clear()

Clear clears all cached entries

func (*QueryCache) Get ¶

func (qc *QueryCache) Get(ctx context.Context, query string) ([]QueryResult, bool)

Get retrieves a cached query result

func (*QueryCache) Set ¶

func (qc *QueryCache) Set(ctx context.Context, query string, results []QueryResult)

Set stores a query result in the cache

func (*QueryCache) Stats ¶

func (qc *QueryCache) Stats() map[string]any

Stats returns cache statistics

type QueryOptions ¶

type QueryOptions struct {
	Namespace       string
	TopK            int
	MinScore        float64  // 分数阈值
	Filters         []Filter // 复杂过滤
	Model           string   // embedding 模型
	EnableReranking bool     // 是否启用重排序（仅 Aliyun 支持）
	ReturnMetadata  bool     // 是否返回完整元数据
}

type QueryResult ¶

type QueryResult struct {
	Record Record  `json:"record"`
	Score  float64 `json:"score"`
}

type RAGFlowConfig ¶

type RAGFlowConfig struct {
	BaseURL string
	APIKey  string
	Timeout time.Duration
}

RAGFlowConfig configuration for RAGFlow provider

type RAGFlowHandler ¶

type RAGFlowHandler struct {
	BaseURL    string
	APIKey     string
	HTTPClient *http.Client
	Embedder   embedder.Embedder
}

RAGFlowHandler implements KnowledgeHandler using RAGFlow. RAGFlow is an open-source RAG engine that provides document management, chunking, and vector search capabilities.

func (*RAGFlowHandler) CreateNamespace ¶

func (rh *RAGFlowHandler) CreateNamespace(ctx context.Context, name string) error

CreateNamespace creates a new dataset in RAGFlow

func (*RAGFlowHandler) Delete ¶

func (rh *RAGFlowHandler) Delete(ctx context.Context, ids []string, opts *DeleteOptions) error

Delete removes records by IDs

func (*RAGFlowHandler) DeleteNamespace ¶

func (rh *RAGFlowHandler) DeleteNamespace(ctx context.Context, name string) error

DeleteNamespace deletes a dataset from RAGFlow

func (*RAGFlowHandler) Get ¶

func (rh *RAGFlowHandler) Get(ctx context.Context, ids []string, opts *GetOptions) ([]Record, error)

Get retrieves records by IDs

func (*RAGFlowHandler) List ¶

func (rh *RAGFlowHandler) List(ctx context.Context, opts *ListOptions) (*ListResult, error)

List lists all records in a namespace

func (*RAGFlowHandler) ListNamespaces ¶

func (rh *RAGFlowHandler) ListNamespaces(ctx context.Context) ([]string, error)

ListNamespaces lists all datasets in RAGFlow

func (*RAGFlowHandler) Ping ¶

func (rh *RAGFlowHandler) Ping(ctx context.Context) error

Ping checks the health of RAGFlow service

func (*RAGFlowHandler) Provider ¶

func (rh *RAGFlowHandler) Provider() string

func (*RAGFlowHandler) Query ¶

func (rh *RAGFlowHandler) Query(ctx context.Context, text string, opts *QueryOptions) ([]QueryResult, error)

Query searches documents in RAGFlow knowledge base

func (*RAGFlowHandler) Upsert ¶

func (rh *RAGFlowHandler) Upsert(ctx context.Context, records []Record, opts *UpsertOptions) error

Upsert adds or updates records in RAGFlow knowledge base

type Record ¶

type Record struct {
	ID        string         `json:"id"`
	Source    string         `json:"source"` // 来源file/url/api etc.
	Title     string         `json:"title"`
	Content   string         `json:"content"` // 原文片段
	Vector    []float32      `json:"vector"`  // 向量
	Tags      []string       `json:"tags"`
	Metadata  map[string]any `json:"metadata"`
	CreatedAt time.Time      `json:"created_at"`
	UpdatedAt time.Time      `json:"updated_at"`
}

Record 知识库记录

type RuleBasedDocumentTypeDetector ¶

type RuleBasedDocumentTypeDetector struct{}

RuleBasedDocumentTypeDetector classifies documents by simple heuristics.

Targets: - Structured (90%): manuals, papers, contracts, reports, markdown - Table/KV (5%): resumes, forms, questionnaires, excel-to-text, financial docs - Unstructured noisy (5%): OCR text, novels, garbled webpages, chat logs

func (*RuleBasedDocumentTypeDetector) DetectDocumentType ¶

func (d *RuleBasedDocumentTypeDetector) DetectDocumentType(ctx context.Context, text string) (DocumentType, error)

type SyncStats ¶

type SyncStats struct {
	Added      int
	Updated    int
	Deleted    int
	Unchanged  int
	TotalTime  time.Duration
	LastSyncAt time.Time
}

SyncStats represents synchronization statistics

type UpsertOptions ¶

type UpsertOptions struct {
	Namespace string
	Overwrite bool
	BatchSize int
}

type VectorCache ¶

type VectorCache struct {
	// contains filtered or unexported fields
}

VectorCache provides caching for embedding vectors

func NewVectorCache ¶

func NewVectorCache(maxSize int) *VectorCache

NewVectorCache creates a new vector cache

func (*VectorCache) Clear ¶

func (vc *VectorCache) Clear()

Clear clears all cached vectors

func (*VectorCache) Get ¶

func (vc *VectorCache) Get(text string) ([]float32, bool)

Get retrieves a cached vector

func (*VectorCache) Set ¶

func (vc *VectorCache) Set(text string, vector []float32)

Set stores a vector in the cache

func (*VectorCache) Size ¶

func (vc *VectorCache) Size() int

Size returns the current cache size

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL