Versions in this module Expand all Collapse all v0 v0.1.1 Apr 12, 2026 Changes in this version + const FileTypeCSV + const FileTypeDOC + const FileTypeDOCX + const FileTypeEML + const FileTypeHTML + const FileTypeJPEG + const FileTypeJPG + const FileTypeJSON + const FileTypeMD + const FileTypePDF + const FileTypePNG + const FileTypePPTX + const FileTypeRTF + const FileTypeTXT + const FileTypeUnknown + const FileTypeXLSX + const FileTypeYAML + const FileTypeYML + var ErrEmptyInput = errors.New("empty input") + var ErrUnsupportedFileType = errors.New("unsupported file type") + func DetectFileType(req *ParseRequest) string + type CSVParser struct + func (p *CSVParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *CSVParser) Provider() string + func (p *CSVParser) SupportedTypes() []string + type DOCParser struct + func (p *DOCParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *DOCParser) Provider() string + func (p *DOCParser) SupportedTypes() []string + type DOCXParser struct + func (p *DOCXParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *DOCXParser) Provider() string + func (p *DOCXParser) SupportedTypes() []string + type EMLParser struct + func (p *EMLParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *EMLParser) Provider() string + func (p *EMLParser) SupportedTypes() []string + type HTMLParser struct + func (p *HTMLParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *HTMLParser) Provider() string + func (p *HTMLParser) SupportedTypes() []string + type JSONParser struct + func (p *JSONParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *JSONParser) Provider() string + func (p *JSONParser) SupportedTypes() []string + type OCRParser struct + Language string + func (p *OCRParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *OCRParser) Provider() string + func (p *OCRParser) SupportedTypes() []string + type PDFParser struct + func (p *PDFParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *PDFParser) Provider() string + func (p *PDFParser) SupportedTypes() []string + type PPTXParser struct + func (p *PPTXParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *PPTXParser) Provider() string + func (p *PPTXParser) SupportedTypes() []string + type ParseOptions struct + IncludeHidden bool + IncludeTables bool + MaxTextLength int + PreserveLineBreaks bool + type ParseRequest struct + Content []byte + ContentType string + FileName string + FileType string + Metadata map[string]any + Path string + Reader io.Reader + type ParseResult struct + FileName string + FileType string + Metadata map[string]any + ParsedAt time.Time + Sections []Section + Text string + func ParseAuto(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func ParseBytes(ctx context.Context, fileName string, content []byte, opts *ParseOptions) (*ParseResult, error) + func ParsePath(ctx context.Context, path string, opts *ParseOptions) (*ParseResult, error) + type Parser interface + Parse func(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + Provider func() string + SupportedTypes func() []string + type RTFParser struct + func (p *RTFParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *RTFParser) Provider() string + func (p *RTFParser) SupportedTypes() []string + type Router struct + func DefaultRouter() *Router + func NewRouter(parsers ...Parser) *Router + func (r *Router) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (r *Router) Register(p Parser) error + type Section struct + Index int + Metadata map[string]any + Text string + Title string + Type SectionType + type SectionType string + const SectionTypeDocument + const SectionTypePage + const SectionTypeSheet + const SectionTypeSlide + const SectionTypeUnknown + type TXTParser struct + func (p *TXTParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *TXTParser) Provider() string + func (p *TXTParser) SupportedTypes() []string + type XLSXParser struct + func (p *XLSXParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *XLSXParser) Provider() string + func (p *XLSXParser) SupportedTypes() []string + type YAMLParser struct + func (p *YAMLParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error) + func (p *YAMLParser) Provider() string + func (p *YAMLParser) SupportedTypes() []string