Documentation
¶
Index ¶
- func NewCharacterTextSplitter(opts ...CharacterTextSplitterOption) rag.TextSplitter
- func NewRecursiveCharacterTextSplitter(opts ...RecursiveCharacterTextSplitterOption) rag.TextSplitter
- func NewSimpleTextSplitter(chunkSize, chunkOverlap int) rag.TextSplitter
- func NewTokenTextSplitter(chunkSize, chunkOverlap int, tokenizer Tokenizer) rag.TextSplitter
- type CharacterTextSplitter
- type CharacterTextSplitterOption
- type DefaultTokenizer
- type RecursiveCharacterTextSplitter
- type RecursiveCharacterTextSplitterOption
- type SimpleTextSplitter
- type TokenTextSplitter
- type Tokenizer
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func NewCharacterTextSplitter ¶
func NewCharacterTextSplitter(opts ...CharacterTextSplitterOption) rag.TextSplitter
NewCharacterTextSplitter creates a new CharacterTextSplitter
func NewRecursiveCharacterTextSplitter ¶
func NewRecursiveCharacterTextSplitter(opts ...RecursiveCharacterTextSplitterOption) rag.TextSplitter
NewRecursiveCharacterTextSplitter creates a new RecursiveCharacterTextSplitter
func NewSimpleTextSplitter ¶
func NewSimpleTextSplitter(chunkSize, chunkOverlap int) rag.TextSplitter
NewSimpleTextSplitter creates a new SimpleTextSplitter
func NewTokenTextSplitter ¶
func NewTokenTextSplitter(chunkSize, chunkOverlap int, tokenizer Tokenizer) rag.TextSplitter
NewTokenTextSplitter creates a new TokenTextSplitter
Types ¶
type CharacterTextSplitter ¶
type CharacterTextSplitter struct {
// contains filtered or unexported fields
}
CharacterTextSplitter splits text by character count
func (*CharacterTextSplitter) JoinText ¶
func (s *CharacterTextSplitter) JoinText(chunks []string) string
JoinText joins text chunks back together
func (*CharacterTextSplitter) SplitDocuments ¶
func (s *CharacterTextSplitter) SplitDocuments(docs []rag.Document) []rag.Document
SplitDocuments splits documents into chunks
func (*CharacterTextSplitter) SplitText ¶
func (s *CharacterTextSplitter) SplitText(text string) []string
SplitText splits text into chunks by separator or character
type CharacterTextSplitterOption ¶
type CharacterTextSplitterOption func(*CharacterTextSplitter)
CharacterTextSplitterOption configures the CharacterTextSplitter
func WithCharacterChunkOverlap ¶ added in v0.7.1
func WithCharacterChunkOverlap(overlap int) CharacterTextSplitterOption
WithCharacterChunkOverlap sets the chunk overlap for character splitter
func WithCharacterChunkSize ¶ added in v0.7.1
func WithCharacterChunkSize(size int) CharacterTextSplitterOption
WithCharacterChunkSize sets the chunk size for character splitter
func WithCharacterSeparator ¶
func WithCharacterSeparator(separator string) CharacterTextSplitterOption
WithCharacterSeparator sets the separator for character splitter
type DefaultTokenizer ¶
type DefaultTokenizer struct{}
DefaultTokenizer is a simple word-based tokenizer
func (*DefaultTokenizer) Decode ¶
func (t *DefaultTokenizer) Decode(tokens []string) string
Decode detokenizes words back to text
func (*DefaultTokenizer) Encode ¶
func (t *DefaultTokenizer) Encode(text string) []string
Encode tokenizes text into words
type RecursiveCharacterTextSplitter ¶
type RecursiveCharacterTextSplitter struct {
// contains filtered or unexported fields
}
RecursiveCharacterTextSplitter recursively splits text while keeping related pieces together
func (*RecursiveCharacterTextSplitter) JoinText ¶
func (s *RecursiveCharacterTextSplitter) JoinText(chunks []string) string
JoinText joins text chunks back together
func (*RecursiveCharacterTextSplitter) SplitDocuments ¶
func (s *RecursiveCharacterTextSplitter) SplitDocuments(docs []rag.Document) []rag.Document
SplitDocuments splits documents into chunks
func (*RecursiveCharacterTextSplitter) SplitText ¶
func (s *RecursiveCharacterTextSplitter) SplitText(text string) []string
SplitText splits text into chunks
type RecursiveCharacterTextSplitterOption ¶
type RecursiveCharacterTextSplitterOption func(*RecursiveCharacterTextSplitter)
RecursiveCharacterTextSplitterOption configures the RecursiveCharacterTextSplitter
func WithChunkOverlap ¶
func WithChunkOverlap(overlap int) RecursiveCharacterTextSplitterOption
WithChunkOverlap sets the chunk overlap for the splitter
func WithChunkSize ¶
func WithChunkSize(size int) RecursiveCharacterTextSplitterOption
WithChunkSize sets the chunk size for the splitter
func WithLengthFunction ¶
func WithLengthFunction(fn func(string) int) RecursiveCharacterTextSplitterOption
WithLengthFunction sets a custom length function
func WithSeparators ¶
func WithSeparators(separators []string) RecursiveCharacterTextSplitterOption
WithSeparators sets the custom separators for the splitter
type SimpleTextSplitter ¶
SimpleTextSplitter splits text into chunks of a given size
func (*SimpleTextSplitter) JoinText ¶
func (s *SimpleTextSplitter) JoinText(chunks []string) string
JoinText joins text chunks back together
func (*SimpleTextSplitter) SplitDocuments ¶
func (s *SimpleTextSplitter) SplitDocuments(documents []rag.Document) []rag.Document
SplitDocuments splits documents into smaller chunks
func (*SimpleTextSplitter) SplitText ¶
func (s *SimpleTextSplitter) SplitText(text string) []string
SplitText splits text into chunks
type TokenTextSplitter ¶
type TokenTextSplitter struct {
// contains filtered or unexported fields
}
TokenTextSplitter splits text by token count
func (*TokenTextSplitter) JoinText ¶
func (s *TokenTextSplitter) JoinText(chunks []string) string
JoinText joins text chunks back together
func (*TokenTextSplitter) SplitDocuments ¶
func (s *TokenTextSplitter) SplitDocuments(docs []rag.Document) []rag.Document
SplitDocuments splits documents into chunks
func (*TokenTextSplitter) SplitText ¶
func (s *TokenTextSplitter) SplitText(text string) []string
SplitText splits text into chunks by token count