Documentation
¶
Index ¶
- Constants
- type Chunk
- type ChunkData
- type ChunkerConfig
- type EmbeddingClient
- type EmbeddingConfig
- type FileMetadata
- type Indexer
- func (idx *Indexer) DeleteDocument(namespace, digest string) error
- func (idx *Indexer) IndexChunks(namespace, digest, fileName, content string) error
- func (idx *Indexer) IndexDocument(namespace, digest, fileName, content string) error
- func (idx *Indexer) PrepareDocument(namespace, digest, fileName, content string) (bool, error)
- type S3Client
- func (c *S3Client) DeleteDocument(ctx context.Context, namespace, digest string) error
- func (c *S3Client) DocumentExists(ctx context.Context, namespace, digest string) (bool, error)
- func (c *S3Client) DownloadDocument(ctx context.Context, namespace, digest string) ([]byte, error)
- func (c *S3Client) UploadDocument(ctx context.Context, namespace, digest string, data []byte) error
- type S3Config
- type TiDBClient
- func (c *TiDBClient) Close() error
- func (c *TiDBClient) CreateNamespace(namespace string, embeddingDim int) error
- func (c *TiDBClient) DeleteFileByName(namespace, fileName string) error
- func (c *TiDBClient) DeleteFileChunks(namespace, fileDigest string) error
- func (c *TiDBClient) DeleteFileMetadata(namespace, fileDigest string) error
- func (c *TiDBClient) DeleteNamespace(namespace string) error
- func (c *TiDBClient) FileExists(namespace, digest string) (bool, error)
- func (c *TiDBClient) GetFileMetadataByName(namespace, fileName string) (*FileMetadata, error)
- func (c *TiDBClient) HasFilesWithPrefix(namespace, prefix string) (bool, error)
- func (c *TiDBClient) InsertChunk(namespace, fileDigest string, chunkIndex int, chunkText string, ...) error
- func (c *TiDBClient) InsertChunksBatch(namespace, fileDigest string, chunks []ChunkData) error
- func (c *TiDBClient) InsertFileMetadata(namespace string, meta FileMetadata) error
- func (c *TiDBClient) ListFiles(namespace string) ([]FileMetadata, error)
- func (c *TiDBClient) ListFilesWithPrefix(namespace, prefix string) ([]FileMetadata, error)
- func (c *TiDBClient) ListNamespaces() ([]string, error)
- func (c *TiDBClient) NamespaceExists(namespace string) (bool, error)
- func (c *TiDBClient) VectorSearch(namespace string, queryEmbedding []float32, limit int) ([]VectorMatch, error)
- type TiDBConfig
- type VectorFSPlugin
- func (v *VectorFSPlugin) GetConfigParams() []plugin.ConfigParameter
- func (v *VectorFSPlugin) GetFileSystem() filesystem.FileSystem
- func (v *VectorFSPlugin) GetReadme() string
- func (v *VectorFSPlugin) Initialize(cfg map[string]interface{}) error
- func (v *VectorFSPlugin) Name() string
- func (v *VectorFSPlugin) Shutdown() error
- func (v *VectorFSPlugin) Validate(cfg map[string]interface{}) error
- type VectorMatch
Constants ¶
const (
PluginName = "vectorfs"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Chunk ¶
Chunk represents a text chunk
func ChunkDocument ¶
func ChunkDocument(text string, cfg ChunkerConfig) []Chunk
ChunkDocument splits a document into chunks
type ChunkerConfig ¶
type ChunkerConfig struct {
ChunkSize int // Approximate chunk size in tokens
ChunkOverlap int // Overlap between chunks in tokens
}
ChunkerConfig holds chunking configuration
type EmbeddingClient ¶
type EmbeddingClient struct {
// contains filtered or unexported fields
}
EmbeddingClient handles embedding generation
func NewEmbeddingClient ¶
func NewEmbeddingClient(cfg EmbeddingConfig) (*EmbeddingClient, error)
NewEmbeddingClient creates a new embedding client
func (*EmbeddingClient) GenerateBatchEmbeddings ¶
func (e *EmbeddingClient) GenerateBatchEmbeddings(texts []string) ([][]float32, error)
GenerateBatchEmbeddings generates embeddings for multiple texts
func (*EmbeddingClient) GenerateEmbedding ¶
func (e *EmbeddingClient) GenerateEmbedding(text string) ([]float32, error)
GenerateEmbedding generates an embedding for the given text
func (*EmbeddingClient) GetDimension ¶
func (e *EmbeddingClient) GetDimension() int
GetDimension returns the embedding dimension
type EmbeddingConfig ¶
type EmbeddingConfig struct {
Provider string // Provider name (openai)
APIKey string // API key
Model string // Model name
Dimension int // Embedding dimension
}
EmbeddingConfig holds embedding configuration
type FileMetadata ¶
type FileMetadata struct {
FileDigest string
FileName string
S3Key string
FileSize int64
CreatedAt time.Time
UpdatedAt time.Time
}
FileMetadata represents file metadata stored in TiDB
type Indexer ¶
type Indexer struct {
// contains filtered or unexported fields
}
Indexer handles document indexing
func NewIndexer ¶
func NewIndexer( s3Client *S3Client, tidbClient *TiDBClient, embeddingClient *EmbeddingClient, chunkerConfig ChunkerConfig, ) *Indexer
NewIndexer creates a new indexer
func (*Indexer) DeleteDocument ¶
DeleteDocument removes a document from the index
func (*Indexer) IndexChunks ¶
IndexChunks performs chunking, embedding generation, and stores chunks in TiDB (async phase). This is called after PrepareDocument to enable vector search on the document.
func (*Indexer) IndexDocument ¶
IndexDocument indexes a document (upload to S3, chunk, generate embeddings, store in TiDB) Deprecated: Use PrepareDocument + IndexChunks for better performance. This method is kept for backward compatibility.
func (*Indexer) PrepareDocument ¶
PrepareDocument uploads document to S3 and registers metadata in TiDB (synchronous phase). After this completes, the file is visible via ls/cat. Returns (alreadyExists, error) - if alreadyExists is true, no further indexing is needed.
type S3Client ¶
type S3Client struct {
// contains filtered or unexported fields
}
S3Client handles S3 operations for document storage
func NewS3Client ¶
NewS3Client creates a new S3 client
func (*S3Client) DeleteDocument ¶
DeleteDocument deletes a document from S3
func (*S3Client) DocumentExists ¶
DocumentExists checks if a document exists in S3
func (*S3Client) DownloadDocument ¶
DownloadDocument downloads a document from S3
type S3Config ¶
type S3Config struct {
AccessKey string
SecretKey string
Bucket string
KeyPrefix string
Region string
Endpoint string
}
S3Config holds S3 configuration
type TiDBClient ¶
type TiDBClient struct {
// contains filtered or unexported fields
}
TiDBClient handles TiDB operations for vector search
func NewTiDBClient ¶
func NewTiDBClient(cfg TiDBConfig) (*TiDBClient, error)
NewTiDBClient creates a new TiDB client
func (*TiDBClient) CreateNamespace ¶
func (c *TiDBClient) CreateNamespace(namespace string, embeddingDim int) error
CreateNamespace creates tables for a new namespace (fails if already exists)
func (*TiDBClient) DeleteFileByName ¶
func (c *TiDBClient) DeleteFileByName(namespace, fileName string) error
DeleteFileByName deletes all versions of a file by name (used before writing new content)
func (*TiDBClient) DeleteFileChunks ¶
func (c *TiDBClient) DeleteFileChunks(namespace, fileDigest string) error
DeleteFileChunks deletes all chunks for a file
func (*TiDBClient) DeleteFileMetadata ¶
func (c *TiDBClient) DeleteFileMetadata(namespace, fileDigest string) error
DeleteFileMetadata deletes file metadata
func (*TiDBClient) DeleteNamespace ¶
func (c *TiDBClient) DeleteNamespace(namespace string) error
DeleteNamespace drops all tables for a namespace
func (*TiDBClient) FileExists ¶
func (c *TiDBClient) FileExists(namespace, digest string) (bool, error)
FileExists checks if a file (by digest) is already indexed
func (*TiDBClient) GetFileMetadataByName ¶
func (c *TiDBClient) GetFileMetadataByName(namespace, fileName string) (*FileMetadata, error)
GetFileMetadataByName retrieves file metadata by file name (returns the latest version)
func (*TiDBClient) HasFilesWithPrefix ¶
func (c *TiDBClient) HasFilesWithPrefix(namespace, prefix string) (bool, error)
HasFilesWithPrefix checks if any files exist with the given prefix (for directory detection) This is much faster than loading all files just to check if a directory exists
func (*TiDBClient) InsertChunk ¶
func (c *TiDBClient) InsertChunk(namespace, fileDigest string, chunkIndex int, chunkText string, embedding []float32) error
InsertChunk inserts a document chunk with embedding
func (*TiDBClient) InsertChunksBatch ¶
func (c *TiDBClient) InsertChunksBatch(namespace, fileDigest string, chunks []ChunkData) error
InsertChunksBatch inserts multiple chunks in a single batch operation This significantly reduces database round-trips compared to individual inserts
func (*TiDBClient) InsertFileMetadata ¶
func (c *TiDBClient) InsertFileMetadata(namespace string, meta FileMetadata) error
InsertFileMetadata inserts file metadata
func (*TiDBClient) ListFiles ¶
func (c *TiDBClient) ListFiles(namespace string) ([]FileMetadata, error)
ListFiles lists all files in a namespace
func (*TiDBClient) ListFilesWithPrefix ¶
func (c *TiDBClient) ListFilesWithPrefix(namespace, prefix string) ([]FileMetadata, error)
ListFilesWithPrefix lists files in a namespace with a given prefix (database-level filtering) This is more efficient than ListFiles when only a subset of files is needed
func (*TiDBClient) ListNamespaces ¶
func (c *TiDBClient) ListNamespaces() ([]string, error)
ListNamespaces lists all namespaces (by finding all tbl_meta_* tables)
func (*TiDBClient) NamespaceExists ¶
func (c *TiDBClient) NamespaceExists(namespace string) (bool, error)
NamespaceExists checks if a namespace exists
func (*TiDBClient) VectorSearch ¶
func (c *TiDBClient) VectorSearch(namespace string, queryEmbedding []float32, limit int) ([]VectorMatch, error)
VectorSearch performs vector similarity search
type TiDBConfig ¶
type TiDBConfig struct {
DSN string // Connection string
}
TiDBConfig holds TiDB configuration
type VectorFSPlugin ¶
type VectorFSPlugin struct {
// contains filtered or unexported fields
}
func NewVectorFSPlugin ¶
func NewVectorFSPlugin() *VectorFSPlugin
NewVectorFSPlugin creates a new VectorFS plugin
func (*VectorFSPlugin) GetConfigParams ¶
func (v *VectorFSPlugin) GetConfigParams() []plugin.ConfigParameter
func (*VectorFSPlugin) GetFileSystem ¶
func (v *VectorFSPlugin) GetFileSystem() filesystem.FileSystem
func (*VectorFSPlugin) GetReadme ¶
func (v *VectorFSPlugin) GetReadme() string
func (*VectorFSPlugin) Initialize ¶
func (v *VectorFSPlugin) Initialize(cfg map[string]interface{}) error
func (*VectorFSPlugin) Name ¶
func (v *VectorFSPlugin) Name() string
func (*VectorFSPlugin) Shutdown ¶
func (v *VectorFSPlugin) Shutdown() error
func (*VectorFSPlugin) Validate ¶
func (v *VectorFSPlugin) Validate(cfg map[string]interface{}) error