Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var MuPDFLock sync.Mutex
Functions ¶
func WithConfig ¶
func WithConfig(config PDFOptions) func(o *PDFOptions)
WithConfig sets the PDF loader configuration.
func WithDisablePageMerge ¶
func WithDisablePageMerge() func(o *PDFOptions)
Types ¶
type PDF ¶
type PDF struct {
Opts PDFOptions
Document *fitz.Document
Converter *mdconv.Converter
Lock *sync.Mutex
Tokenizer *tiktoken.Tiktoken
}
PDF represents a PDF Document loader that implements the DocumentLoader interface.
type PDFOptions ¶
type PDFOptions struct {
// Password for encrypted PDF files.
Password string
// Page number to start loading from (default is 1).
StartPage uint
// Number of goroutines to load pdf documents
NumThread int
// EnablePageMerge
EnablePageMerge bool
// ChunkSize - maximum number of tokens allowed in a single document
ChunkSize int
ChunkOverlap int
// TokenEncoding - encoding for Tokenizer to use for page merging
TokenEncoding string
// Tokenizer - target model for Tokenizer to use for page merging
TokenModel string
}
Click to show internal directories.
Click to hide internal directories.