Documentation
¶
Index ¶
- Constants
- type BaseDocumentLoader
- type BaseTextSplitter
- func (s *BaseTextSplitter) GetChunkOverlap() int
- func (s *BaseTextSplitter) GetChunkSize() int
- func (s *BaseTextSplitter) MergeSplits(splits []string, separator string) []string
- func (s *BaseTextSplitter) SplitDocuments(docs []*interfaces.Document) ([]*interfaces.Document, error)
- func (s *BaseTextSplitter) SplitText(text string) ([]string, error)
- func (s *BaseTextSplitter) TriggerCallbacks(ctx context.Context, event string, data interface{}) error
- type BaseTextSplitterConfig
- type CharacterTextSplitter
- type CharacterTextSplitterConfig
- type CodeTextSplitter
- type CodeTextSplitterConfig
- type DirectoryLoader
- type DirectoryLoaderConfig
- type DocumentLoader
- type JSONLoader
- type JSONLoaderConfig
- type MarkdownLoader
- type MarkdownLoaderConfig
- type MarkdownTextSplitter
- type MarkdownTextSplitterConfig
- type RecursiveCharacterTextSplitter
- type RecursiveCharacterTextSplitterConfig
- type TextLoader
- type TextLoaderConfig
- type TextSplitter
- type TokenTextSplitter
- type TokenTextSplitterConfig
- type WebLoader
- type WebLoaderConfig
Constants ¶
const ( LanguageGo = "go" LanguagePython = "python" LanguageJavaScript = "javascript" LanguageTypeScript = "typescript" LanguageJava = "java" LanguageRust = "rust" LanguageCpp = "cpp" LanguageC = "c" )
Language 支持的编程语言
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BaseDocumentLoader ¶
type BaseDocumentLoader struct {
// contains filtered or unexported fields
}
BaseDocumentLoader 基础文档加载器
提供默认的 LoadAndSplit 实现
func NewBaseDocumentLoader ¶
func NewBaseDocumentLoader(metadata map[string]interface{}, callbacks *core.CallbackManager) *BaseDocumentLoader
NewBaseDocumentLoader 创建基础加载器
func (*BaseDocumentLoader) GetCallbackManager ¶
func (l *BaseDocumentLoader) GetCallbackManager() *core.CallbackManager
GetCallbackManager 获取回调管理器
func (*BaseDocumentLoader) GetMetadata ¶
func (l *BaseDocumentLoader) GetMetadata() map[string]interface{}
GetMetadata 获取元数据
func (*BaseDocumentLoader) LoadAndSplit ¶
func (l *BaseDocumentLoader) LoadAndSplit( ctx context.Context, loader DocumentLoader, splitter TextSplitter, ) ([]*interfaces.Document, error)
LoadAndSplit 默认实现:先加载后分割
type BaseTextSplitter ¶
type BaseTextSplitter struct {
// contains filtered or unexported fields
}
BaseTextSplitter 基础文本分割器
提供通用的分割逻辑
func NewBaseTextSplitter ¶
func NewBaseTextSplitter(config BaseTextSplitterConfig) *BaseTextSplitter
NewBaseTextSplitter 创建基础分割器
func (*BaseTextSplitter) GetChunkOverlap ¶
func (s *BaseTextSplitter) GetChunkOverlap() int
GetChunkOverlap 获取块重叠
func (*BaseTextSplitter) GetChunkSize ¶
func (s *BaseTextSplitter) GetChunkSize() int
GetChunkSize 获取块大小
func (*BaseTextSplitter) MergeSplits ¶
func (s *BaseTextSplitter) MergeSplits(splits []string, separator string) []string
MergeSplits 合并文本块
将多个文本块合并,保持在 chunk size 限制内
func (*BaseTextSplitter) SplitDocuments ¶
func (s *BaseTextSplitter) SplitDocuments(docs []*interfaces.Document) ([]*interfaces.Document, error)
SplitDocuments 分割文档
func (*BaseTextSplitter) SplitText ¶
func (s *BaseTextSplitter) SplitText(text string) ([]string, error)
SplitText 需要子类实现
func (*BaseTextSplitter) TriggerCallbacks ¶
func (s *BaseTextSplitter) TriggerCallbacks(ctx context.Context, event string, data interface{}) error
TriggerCallbacks 触发回调
type BaseTextSplitterConfig ¶
type BaseTextSplitterConfig struct {
ChunkSize int
ChunkOverlap int
LengthFunction func(string) int
KeepSeparator bool
CallbackManager *core.CallbackManager
}
BaseTextSplitterConfig 基础分割器配置
type CharacterTextSplitter ¶
type CharacterTextSplitter struct {
*BaseTextSplitter
// contains filtered or unexported fields
}
CharacterTextSplitter 字符分割器
按字符数分割文本,支持自定义分隔符
func NewCharacterTextSplitter ¶
func NewCharacterTextSplitter(config CharacterTextSplitterConfig) *CharacterTextSplitter
NewCharacterTextSplitter 创建字符分割器
type CharacterTextSplitterConfig ¶
type CharacterTextSplitterConfig struct {
Separator string
ChunkSize int
ChunkOverlap int
KeepSeparator bool
CallbackManager *core.CallbackManager
}
CharacterTextSplitterConfig 字符分割器配置
type CodeTextSplitter ¶
type CodeTextSplitter struct {
*BaseTextSplitter
// contains filtered or unexported fields
}
CodeTextSplitter 代码分割器
针对代码进行智能分割,保持代码结构完整性
func NewCodeTextSplitter ¶
func NewCodeTextSplitter(config CodeTextSplitterConfig) *CodeTextSplitter
NewCodeTextSplitter 创建代码分割器
type CodeTextSplitterConfig ¶
type CodeTextSplitterConfig struct {
Language string
ChunkSize int
ChunkOverlap int
CallbackManager *core.CallbackManager
}
CodeTextSplitterConfig 代码分割器配置
type DirectoryLoader ¶
type DirectoryLoader struct {
*BaseDocumentLoader
// contains filtered or unexported fields
}
DirectoryLoader 目录加载器
批量加载目录中的文件
func NewDirectoryLoader ¶
func NewDirectoryLoader(config DirectoryLoaderConfig) *DirectoryLoader
NewDirectoryLoader 创建目录加载器
func (*DirectoryLoader) Load ¶
func (l *DirectoryLoader) Load(ctx context.Context) ([]*interfaces.Document, error)
Load 加载目录中的所有文件
func (*DirectoryLoader) LoadAndSplit ¶
func (l *DirectoryLoader) LoadAndSplit(ctx context.Context, splitter TextSplitter) ([]*interfaces.Document, error)
LoadAndSplit 加载并分割
type DirectoryLoaderConfig ¶
type DirectoryLoaderConfig struct {
DirPath string
Glob string
Recursive bool
Loader func(string) DocumentLoader
Metadata map[string]interface{}
CallbackManager *core.CallbackManager
}
DirectoryLoaderConfig 目录加载器配置
type DocumentLoader ¶
type DocumentLoader interface {
// Load 加载文档
Load(ctx context.Context) ([]*interfaces.Document, error)
// LoadAndSplit 加载并分割文档
LoadAndSplit(ctx context.Context, splitter TextSplitter) ([]*interfaces.Document, error)
// GetMetadata 获取加载器元数据
GetMetadata() map[string]interface{}
}
DocumentLoader 文档加载器接口
负责从各种来源加载文档,支持单独加载或加载后分割
type JSONLoader ¶
type JSONLoader struct {
*BaseDocumentLoader
// contains filtered or unexported fields
}
JSONLoader JSON 文件加载器
加载 JSON 文件,支持 JSON Lines 格式
func NewJSONLoader ¶
func NewJSONLoader(config JSONLoaderConfig) *JSONLoader
NewJSONLoader 创建 JSON 加载器
func (*JSONLoader) Load ¶
func (l *JSONLoader) Load(ctx context.Context) ([]*interfaces.Document, error)
Load 加载 JSON 文件
func (*JSONLoader) LoadAndSplit ¶
func (l *JSONLoader) LoadAndSplit(ctx context.Context, splitter TextSplitter) ([]*interfaces.Document, error)
LoadAndSplit 加载并分割
type JSONLoaderConfig ¶
type JSONLoaderConfig struct {
FilePath string
JSONLines bool // 是否为 JSON Lines 格式
ContentKey string // 内容字段的键
MetadataKeys []string // 要提取为元数据的键
Metadata map[string]interface{}
CallbackManager *core.CallbackManager
}
JSONLoaderConfig JSON 加载器配置
type MarkdownLoader ¶
type MarkdownLoader struct {
*BaseDocumentLoader
// contains filtered or unexported fields
}
MarkdownLoader Markdown 文件加载器
加载 Markdown 文件并提取结构信息
func NewMarkdownLoader ¶
func NewMarkdownLoader(config MarkdownLoaderConfig) *MarkdownLoader
NewMarkdownLoader 创建 Markdown 加载器
func (*MarkdownLoader) Load ¶
func (l *MarkdownLoader) Load(ctx context.Context) ([]*interfaces.Document, error)
Load 加载 Markdown 文件
func (*MarkdownLoader) LoadAndSplit ¶
func (l *MarkdownLoader) LoadAndSplit(ctx context.Context, splitter TextSplitter) ([]*interfaces.Document, error)
LoadAndSplit 加载并分割
type MarkdownLoaderConfig ¶
type MarkdownLoaderConfig struct {
FilePath string
RemoveImages bool
RemoveLinks bool
RemoveCodeFmt bool
Metadata map[string]interface{}
CallbackManager *core.CallbackManager
}
MarkdownLoaderConfig Markdown 加载器配置
type MarkdownTextSplitter ¶
type MarkdownTextSplitter struct {
*BaseTextSplitter
// contains filtered or unexported fields
}
MarkdownTextSplitter Markdown 智能分割器
按 Markdown 结构分割(标题、段落等)
func NewMarkdownTextSplitter ¶
func NewMarkdownTextSplitter(config MarkdownTextSplitterConfig) *MarkdownTextSplitter
NewMarkdownTextSplitter 创建 Markdown 分割器
type MarkdownTextSplitterConfig ¶
type MarkdownTextSplitterConfig struct {
HeadersToSplitOn []string // 要分割的标题级别,如 []string{"#", "##", "###"}
ChunkSize int
ChunkOverlap int
CallbackManager *core.CallbackManager
}
MarkdownTextSplitterConfig Markdown 分割器配置
type RecursiveCharacterTextSplitter ¶
type RecursiveCharacterTextSplitter struct {
*BaseTextSplitter
// contains filtered or unexported fields
}
RecursiveCharacterTextSplitter 递归字符分割器
使用多个分隔符递归分割,优先使用段落、句子等自然边界
func NewRecursiveCharacterTextSplitter ¶
func NewRecursiveCharacterTextSplitter(config RecursiveCharacterTextSplitterConfig) *RecursiveCharacterTextSplitter
NewRecursiveCharacterTextSplitter 创建递归分割器
type RecursiveCharacterTextSplitterConfig ¶
type RecursiveCharacterTextSplitterConfig struct {
Separators []string
ChunkSize int
ChunkOverlap int
KeepSeparator bool
CallbackManager *core.CallbackManager
}
RecursiveCharacterTextSplitterConfig 递归分割器配置
type TextLoader ¶
type TextLoader struct {
*BaseDocumentLoader
// contains filtered or unexported fields
}
TextLoader 文本文件加载器
加载纯文本文件,支持各种编码
func (*TextLoader) Load ¶
func (l *TextLoader) Load(ctx context.Context) ([]*interfaces.Document, error)
Load 加载文本文件
func (*TextLoader) LoadAndSplit ¶
func (l *TextLoader) LoadAndSplit(ctx context.Context, splitter TextSplitter) ([]*interfaces.Document, error)
LoadAndSplit 加载并分割
type TextLoaderConfig ¶
type TextLoaderConfig struct {
FilePath string
Encoding string
Metadata map[string]interface{}
CallbackManager *core.CallbackManager
}
TextLoaderConfig 文本加载器配置
type TextSplitter ¶
type TextSplitter interface {
// SplitText 分割文本
SplitText(text string) ([]string, error)
// SplitDocuments 分割文档
SplitDocuments(docs []*interfaces.Document) ([]*interfaces.Document, error)
// GetChunkSize 获取块大小
GetChunkSize() int
// GetChunkOverlap 获取块重叠大小
GetChunkOverlap() int
}
TextSplitter 文本分割器接口
负责将长文本分割成更小的块,用于嵌入和检索
type TokenTextSplitter ¶
type TokenTextSplitter struct {
*BaseTextSplitter
// contains filtered or unexported fields
}
TokenTextSplitter Token 分割器
按 token 数分割文本(简化实现,使用空格分词)
func NewTokenTextSplitter ¶
func NewTokenTextSplitter(config TokenTextSplitterConfig) *TokenTextSplitter
NewTokenTextSplitter 创建 Token 分割器
type TokenTextSplitterConfig ¶
type TokenTextSplitterConfig struct {
Encoding string // 编码方式(保留用于扩展)
ChunkSize int
ChunkOverlap int
CallbackManager *core.CallbackManager
}
TokenTextSplitterConfig Token 分割器配置
type WebLoader ¶
type WebLoader struct {
*BaseDocumentLoader
// contains filtered or unexported fields
}
WebLoader Web 页面加载器
通过 HTTP 加载 Web 页面内容
func (*WebLoader) LoadAndSplit ¶
func (l *WebLoader) LoadAndSplit(ctx context.Context, splitter TextSplitter) ([]*interfaces.Document, error)
LoadAndSplit 加载并分割