Documentation
¶
Index ¶
- Constants
- Variables
- type BMPParser
- type CompositeParser
- func (p *CompositeParser) AddParsers(parsers ...Parser)
- func (p *CompositeParser) Parse(ctx context.Context, file io.Reader, path string) Result
- func (p *CompositeParser) ParseStream(ctx context.Context, file io.Reader, path string) StreamResultIterator
- func (p *CompositeParser) SupportedMimeTypes() []string
- type CompositeParserResult
- type CompositeParserStreamResult
- func (r *CompositeParserStreamResult) Error() error
- func (r *CompositeParserStreamResult) Path() string
- func (r *CompositeParserStreamResult) Progress() uint8
- func (r *CompositeParserStreamResult) Stage() ParseProgressStage
- func (r *CompositeParserStreamResult) String() string
- func (r *CompositeParserStreamResult) SubResult() StreamResult
- type CompositeStreamResultIterator
- type EMLParser
- type EMLParserResult
- type EMLParserStreamResult
- func (r *EMLParserStreamResult) Error() error
- func (r *EMLParserStreamResult) Path() string
- func (r *EMLParserStreamResult) Progress() uint8
- func (r *EMLParserStreamResult) Stage() ParseProgressStage
- func (r *EMLParserStreamResult) String() string
- func (r *EMLParserStreamResult) SubResult() StreamResult
- type EMLStreamResultIterator
- type ErrMimeTypeNotSupported
- type GIFParser
- type ImageParserResult
- type ImageParserStreamResult
- func (r *ImageParserStreamResult) Error() error
- func (r *ImageParserStreamResult) Path() string
- func (r *ImageParserStreamResult) Progress() uint8
- func (r *ImageParserStreamResult) Stage() ParseProgressStage
- func (r *ImageParserStreamResult) String() string
- func (r *ImageParserStreamResult) SubResult() StreamResult
- type ImageStreamResultIterator
- type JPEGParser
- type PDFParser
- type PDFParserResult
- type PDFParserStreamResult
- func (r *PDFParserStreamResult) Error() error
- func (r *PDFParserStreamResult) Path() string
- func (r *PDFParserStreamResult) Progress() uint8
- func (r *PDFParserStreamResult) Stage() ParseProgressStage
- func (r *PDFParserStreamResult) String() string
- func (r *PDFParserStreamResult) SubResult() StreamResult
- type PDFStreamResultIterator
- type PNGParser
- type ParseProgressStage
- type Parser
- type RAWBGRAParser
- type Result
- type StreamResult
- type StreamResultIterator
- type TARParser
- type TARParserResult
- type TARParserStreamResult
- func (r *TARParserStreamResult) Error() error
- func (r *TARParserStreamResult) Path() string
- func (r *TARParserStreamResult) Progress() uint8
- func (r *TARParserStreamResult) Stage() ParseProgressStage
- func (r *TARParserStreamResult) String() string
- func (r *TARParserStreamResult) SubResult() StreamResult
- type TARStreamResultIterator
- type TiffParser
- type WebPParser
Constants ¶
View Source
const FeaturePDFEnabled = false
Variables ¶
View Source
var ErrBadFile = errors.New("bad file or corrupted")
View Source
var ErrParserDisabled = errors.New("parser disabled")
Functions ¶
This section is empty.
Types ¶
type BMPParser ¶
type BMPParser struct {
// contains filtered or unexported fields
}
Parses `image/bmp` files
func NewBMPParser ¶
func (*BMPParser) ParseStream ¶ added in v0.0.10
func (*BMPParser) SupportedMimeTypes ¶
type CompositeParser ¶
type CompositeParser struct {
// contains filtered or unexported fields
}
func NewCompositeParser ¶
func NewCompositeParser(parsers ...Parser) *CompositeParser
func (*CompositeParser) AddParsers ¶
func (p *CompositeParser) AddParsers(parsers ...Parser)
func (*CompositeParser) ParseStream ¶ added in v0.0.10
func (p *CompositeParser) ParseStream(ctx context.Context, file io.Reader, path string) StreamResultIterator
func (*CompositeParser) SupportedMimeTypes ¶
func (p *CompositeParser) SupportedMimeTypes() []string
type CompositeParserResult ¶
type CompositeParserResult struct { FullPath string `json:"path"` Err error `json:"error"` MimeType string `json:"mimeType"` Inner Result `json:"inner"` }
func (*CompositeParserResult) Error ¶
func (r *CompositeParserResult) Error() error
func (*CompositeParserResult) Path ¶ added in v0.0.10
func (r *CompositeParserResult) Path() string
func (*CompositeParserResult) String ¶
func (r *CompositeParserResult) String() string
func (*CompositeParserResult) Subfiles ¶ added in v0.0.10
func (r *CompositeParserResult) Subfiles() []Result
type CompositeParserStreamResult ¶ added in v0.0.10
type CompositeParserStreamResult struct { FullPath string `json:"path"` Text string `json:"text"` MimeType string `json:"mimeType"` Inner StreamResult `json:"inner"` CurrentStage ParseProgressStage `json:"stage"` CurrentProgress uint8 `json:"progress"` Err error `json:"error"` }
func (*CompositeParserStreamResult) Error ¶ added in v0.0.10
func (r *CompositeParserStreamResult) Error() error
func (*CompositeParserStreamResult) Path ¶ added in v0.0.10
func (r *CompositeParserStreamResult) Path() string
func (*CompositeParserStreamResult) Progress ¶ added in v0.0.10
func (r *CompositeParserStreamResult) Progress() uint8
func (*CompositeParserStreamResult) Stage ¶ added in v0.0.10
func (r *CompositeParserStreamResult) Stage() ParseProgressStage
func (*CompositeParserStreamResult) String ¶ added in v0.0.10
func (r *CompositeParserStreamResult) String() string
func (*CompositeParserStreamResult) SubResult ¶ added in v0.0.10
func (r *CompositeParserStreamResult) SubResult() StreamResult
type CompositeStreamResultIterator ¶ added in v0.0.18
type CompositeStreamResultIterator struct {
// contains filtered or unexported fields
}
func (*CompositeStreamResultIterator) Close ¶ added in v0.0.18
func (i *CompositeStreamResultIterator) Close()
func (*CompositeStreamResultIterator) Current ¶ added in v0.0.18
func (i *CompositeStreamResultIterator) Current() StreamResult
type EMLParser ¶ added in v0.0.10
type EMLParser struct {
// contains filtered or unexported fields
}
Parses `message/rfc822` files (.eml)
func NewEMLParser ¶ added in v0.0.10
func (*EMLParser) ParseStream ¶ added in v0.0.10
func (*EMLParser) SupportedMimeTypes ¶ added in v0.0.10
type EMLParserResult ¶ added in v0.0.10
type EMLParserResult struct { FullPath string `json:"path"` Headers map[string][]string `json:"headers"` Text string `json:"text"` Err error `json:"error"` Attachments []Result `json:"attachments"` }
func (*EMLParserResult) Error ¶ added in v0.0.10
func (r *EMLParserResult) Error() error
func (*EMLParserResult) Path ¶ added in v0.0.10
func (r *EMLParserResult) Path() string
func (*EMLParserResult) String ¶ added in v0.0.10
func (r *EMLParserResult) String() string
func (*EMLParserResult) Subfiles ¶ added in v0.0.10
func (r *EMLParserResult) Subfiles() []Result
type EMLParserStreamResult ¶ added in v0.0.10
type EMLParserStreamResult struct { FullPath string `json:"path"` Text string `json:"text"` CurrentStage ParseProgressStage `json:"stage"` Headers map[string][]string `json:"headers"` CurrentPartHeader mail.PartHeader `json:"subResultHeader"` CurrentPart StreamResult `json:"subResult"` Err error `json:"error"` }
func (*EMLParserStreamResult) Error ¶ added in v0.0.10
func (r *EMLParserStreamResult) Error() error
func (*EMLParserStreamResult) Path ¶ added in v0.0.10
func (r *EMLParserStreamResult) Path() string
func (*EMLParserStreamResult) Progress ¶ added in v0.0.10
func (r *EMLParserStreamResult) Progress() uint8
func (*EMLParserStreamResult) Stage ¶ added in v0.0.10
func (r *EMLParserStreamResult) Stage() ParseProgressStage
func (*EMLParserStreamResult) String ¶ added in v0.0.10
func (r *EMLParserStreamResult) String() string
func (*EMLParserStreamResult) SubResult ¶ added in v0.0.10
func (r *EMLParserStreamResult) SubResult() StreamResult
type EMLStreamResultIterator ¶ added in v0.0.18
type EMLStreamResultIterator struct {
// contains filtered or unexported fields
}
func (*EMLStreamResultIterator) Close ¶ added in v0.0.18
func (i *EMLStreamResultIterator) Close()
func (*EMLStreamResultIterator) Current ¶ added in v0.0.18
func (i *EMLStreamResultIterator) Current() StreamResult
type ErrMimeTypeNotSupported ¶
func (*ErrMimeTypeNotSupported) Error ¶
func (e *ErrMimeTypeNotSupported) Error() string
type GIFParser ¶
type GIFParser struct {
// contains filtered or unexported fields
}
Parses `image/gif` files. Only decodes first frame
func NewGIFParser ¶
func (*GIFParser) ParseStream ¶ added in v0.0.10
func (*GIFParser) SupportedMimeTypes ¶
type ImageParserResult ¶ added in v0.0.18
type ImageParserResult struct { FullPath string `json:"path"` Text string `json:"text"` Err error `json:"error"` }
func (*ImageParserResult) Error ¶ added in v0.0.18
func (r *ImageParserResult) Error() error
func (*ImageParserResult) Path ¶ added in v0.0.18
func (r *ImageParserResult) Path() string
func (*ImageParserResult) String ¶ added in v0.0.18
func (r *ImageParserResult) String() string
func (*ImageParserResult) Subfiles ¶ added in v0.0.18
func (r *ImageParserResult) Subfiles() []Result
type ImageParserStreamResult ¶ added in v0.0.18
type ImageParserStreamResult struct { FullPath string `json:"path"` Text string `json:"text"` CurrentStage ParseProgressStage `json:"stage"` CurrentProgress uint8 `json:"progress"` Err error `json:"error"` }
func (*ImageParserStreamResult) Error ¶ added in v0.0.18
func (r *ImageParserStreamResult) Error() error
func (*ImageParserStreamResult) Path ¶ added in v0.0.18
func (r *ImageParserStreamResult) Path() string
func (*ImageParserStreamResult) Progress ¶ added in v0.0.18
func (r *ImageParserStreamResult) Progress() uint8
func (*ImageParserStreamResult) Stage ¶ added in v0.0.18
func (r *ImageParserStreamResult) Stage() ParseProgressStage
func (*ImageParserStreamResult) String ¶ added in v0.0.18
func (r *ImageParserStreamResult) String() string
func (*ImageParserStreamResult) SubResult ¶ added in v0.0.18
func (r *ImageParserStreamResult) SubResult() StreamResult
type ImageStreamResultIterator ¶ added in v0.0.18
type ImageStreamResultIterator struct {
// contains filtered or unexported fields
}
func (*ImageStreamResultIterator) Close ¶ added in v0.0.18
func (i *ImageStreamResultIterator) Close()
func (*ImageStreamResultIterator) Current ¶ added in v0.0.18
func (i *ImageStreamResultIterator) Current() StreamResult
type JPEGParser ¶
type JPEGParser struct {
// contains filtered or unexported fields
}
Parses `image/jpeg` files
func NewJPEGParser ¶
func NewJPEGParser(ocrProvider ocr.Provider) *JPEGParser
func (*JPEGParser) ParseStream ¶ added in v0.0.10
func (p *JPEGParser) ParseStream(ctx context.Context, file io.Reader, path string) StreamResultIterator
func (*JPEGParser) SupportedMimeTypes ¶
func (p *JPEGParser) SupportedMimeTypes() []string
type PDFParser ¶
type PDFParser struct { }
Parses `application/pdf` files
func NewPDFParser ¶
func (*PDFParser) ParseStream ¶ added in v0.0.10
func (*PDFParser) SupportedMimeTypes ¶
type PDFParserResult ¶
type PDFParserResult struct { FullPath string `json:"path"` Metadata string `json:"metadata"` Pages []string `json:"pages"` Err error `json:"error"` }
func (*PDFParserResult) Error ¶
func (r *PDFParserResult) Error() error
func (*PDFParserResult) Path ¶ added in v0.0.10
func (r *PDFParserResult) Path() string
func (*PDFParserResult) String ¶
func (r *PDFParserResult) String() string
func (*PDFParserResult) Subfiles ¶ added in v0.0.10
func (r *PDFParserResult) Subfiles() []Result
type PDFParserStreamResult ¶ added in v0.0.10
type PDFParserStreamResult struct { FullPath string `json:"path"` CurrentStage ParseProgressStage `json:"stage"` CurrentProgress uint8 `json:"progress"` Text string `json:"text"` Err error `json:"error"` }
func (*PDFParserStreamResult) Error ¶ added in v0.0.10
func (r *PDFParserStreamResult) Error() error
func (*PDFParserStreamResult) Path ¶ added in v0.0.10
func (r *PDFParserStreamResult) Path() string
func (*PDFParserStreamResult) Progress ¶ added in v0.0.10
func (r *PDFParserStreamResult) Progress() uint8
func (*PDFParserStreamResult) Stage ¶ added in v0.0.10
func (r *PDFParserStreamResult) Stage() ParseProgressStage
func (*PDFParserStreamResult) String ¶ added in v0.0.10
func (r *PDFParserStreamResult) String() string
func (*PDFParserStreamResult) SubResult ¶ added in v0.0.10
func (r *PDFParserStreamResult) SubResult() StreamResult
type PDFStreamResultIterator ¶ added in v0.0.18
type PDFStreamResultIterator struct {
// contains filtered or unexported fields
}
func (*PDFStreamResultIterator) Close ¶ added in v0.0.18
func (i *PDFStreamResultIterator) Close()
func (*PDFStreamResultIterator) Current ¶ added in v0.0.18
func (i *PDFStreamResultIterator) Current() StreamResult
type PNGParser ¶
type PNGParser struct {
// contains filtered or unexported fields
}
Parses `image/png` files
func NewPNGParser ¶
func (*PNGParser) ParseStream ¶ added in v0.0.10
func (*PNGParser) SupportedMimeTypes ¶
type ParseProgressStage ¶ added in v0.0.10
type ParseProgressStage string
const ProgressCompleted ParseProgressStage = "COMPLETED"
Raises on the end of file parsing
const ProgressNew ParseProgressStage = "NEW"
const ProgressUpdate ParseProgressStage = "UPDATE"
Indicates that
type Parser ¶
type Parser interface { // Returns list of supported mime types by this parser SupportedMimeTypes() []string // Parse file. Thread safe. Use path to track subfiles or use file name as hint for mime type detection. Parse(ctx context.Context, file io.Reader, path string) Result // Parse file. Thread safe. Use path to track subfiles or use file name as hint for mime type detection. Return chanel that streams results. ParseStream(ctx context.Context, file io.Reader, path string) StreamResultIterator }
type RAWBGRAParser ¶ added in v0.0.8
type RAWBGRAParser struct {
// contains filtered or unexported fields
}
Parses internal `image/file2llm-raw-bgra` streams
func NewRAWBGRAParser ¶ added in v0.0.8
func NewRAWBGRAParser(ocrProvider ocr.Provider) *RAWBGRAParser
func (*RAWBGRAParser) ParseStream ¶ added in v0.0.10
func (p *RAWBGRAParser) ParseStream(ctx context.Context, file io.Reader, path string) StreamResultIterator
func (*RAWBGRAParser) SupportedMimeTypes ¶ added in v0.0.8
func (p *RAWBGRAParser) SupportedMimeTypes() []string
type Result ¶
type Result interface { // Get full path to the file Path() string // Convert entire result to LLM readable string String() string // Not empty if there where error Error() error // Parsed subfiles. For example files inside archives Subfiles() []Result }
Parsing result
type StreamResult ¶ added in v0.0.10
type StreamResult interface { // Get full path to the file Path() string // Current file processing progress Stage() ParseProgressStage // Progress in percents from 0 to 100 Progress() uint8 // Underlying result SubResult() StreamResult // Convert entire result to LLM readable string String() string // Not empty if there where error Error() error }
type StreamResultIterator ¶ added in v0.0.18
type StreamResultIterator interface { // Block until next stream result available or context is done. If no result available, returns false. Next(ctx context.Context) bool // Return current stream result Current() StreamResult // Free all the associated resources Close() }
type TARParser ¶ added in v0.0.10
type TARParser struct {
// contains filtered or unexported fields
}
func NewTARParser ¶ added in v0.0.10
func (*TARParser) ParseStream ¶ added in v0.0.10
func (*TARParser) SupportedMimeTypes ¶ added in v0.0.10
type TARParserResult ¶ added in v0.0.10
type TARParserResult struct { FullPath string `json:"path"` SubfilesResults []Result `json:"subfiles"` Err error `json:"error"` }
func (*TARParserResult) Error ¶ added in v0.0.10
func (r *TARParserResult) Error() error
func (*TARParserResult) Path ¶ added in v0.0.10
func (r *TARParserResult) Path() string
func (*TARParserResult) String ¶ added in v0.0.10
func (r *TARParserResult) String() string
func (*TARParserResult) Subfiles ¶ added in v0.0.10
func (r *TARParserResult) Subfiles() []Result
type TARParserStreamResult ¶ added in v0.0.10
type TARParserStreamResult struct { FullPath string `json:"path"` CurrentStage ParseProgressStage `json:"stage"` CurrentSubfile StreamResult `json:"subResult"` Err error `json:"error"` }
func (*TARParserStreamResult) Error ¶ added in v0.0.10
func (r *TARParserStreamResult) Error() error
func (*TARParserStreamResult) Path ¶ added in v0.0.10
func (r *TARParserStreamResult) Path() string
func (*TARParserStreamResult) Progress ¶ added in v0.0.10
func (r *TARParserStreamResult) Progress() uint8
func (*TARParserStreamResult) Stage ¶ added in v0.0.10
func (r *TARParserStreamResult) Stage() ParseProgressStage
func (*TARParserStreamResult) String ¶ added in v0.0.10
func (r *TARParserStreamResult) String() string
func (*TARParserStreamResult) SubResult ¶ added in v0.0.10
func (r *TARParserStreamResult) SubResult() StreamResult
type TARStreamResultIterator ¶ added in v0.0.18
type TARStreamResultIterator struct {
// contains filtered or unexported fields
}
func (*TARStreamResultIterator) Close ¶ added in v0.0.18
func (i *TARStreamResultIterator) Close()
func (*TARStreamResultIterator) Current ¶ added in v0.0.18
func (i *TARStreamResultIterator) Current() StreamResult
type TiffParser ¶
type TiffParser struct {
// contains filtered or unexported fields
}
Parses `image/tiff` files
func NewTiffParser ¶
func NewTiffParser(ocrProvider ocr.Provider) *TiffParser
func (*TiffParser) ParseStream ¶ added in v0.0.10
func (p *TiffParser) ParseStream(ctx context.Context, file io.Reader, path string) StreamResultIterator
func (*TiffParser) SupportedMimeTypes ¶
func (p *TiffParser) SupportedMimeTypes() []string
type WebPParser ¶
type WebPParser struct {
// contains filtered or unexported fields
}
Parses `image/webp` files
func NewWebPParser ¶
func NewWebPParser(ocrProvider ocr.Provider) *WebPParser
func (*WebPParser) ParseStream ¶ added in v0.0.10
func (p *WebPParser) ParseStream(ctx context.Context, file io.Reader, path string) StreamResultIterator
func (*WebPParser) SupportedMimeTypes ¶
func (p *WebPParser) SupportedMimeTypes() []string
Source Files
¶
Click to show internal directories.
Click to hide internal directories.