parser

package
v0.0.7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 6, 2025 License: AGPL-3.0 Imports: 14 Imported by: 0

Documentation

Index

Constants

View Source
const FeaturePDFEnabled = false

Variables

View Source
var ErrBadFile = errors.New("bad file or corrupted")
View Source
var ErrParserDisabled = errors.New("parser disabled")

Functions

This section is empty.

Types

type BMPParser

type BMPParser struct {
	// contains filtered or unexported fields
}

Parses `image/bmp` files

func NewBMPParser

func NewBMPParser(ocrProvider ocr.Provider) *BMPParser

func (*BMPParser) Parse

func (p *BMPParser) Parse(ctx context.Context, file io.Reader) Result

func (*BMPParser) SupportedMimeTypes

func (p *BMPParser) SupportedMimeTypes() []string

type BMPParserResult

type BMPParserResult struct {
	Text string `json:"text"`
	Err  error  `json:"error"`
}

func (*BMPParserResult) Componets

func (r *BMPParserResult) Componets() []Result

func (*BMPParserResult) Error

func (r *BMPParserResult) Error() error

func (*BMPParserResult) String

func (r *BMPParserResult) String() string

type CompositeParser

type CompositeParser struct {
	// contains filtered or unexported fields
}

func NewCompositeParser

func NewCompositeParser(parsers ...Parser) *CompositeParser

func (*CompositeParser) AddParsers

func (p *CompositeParser) AddParsers(parsers ...Parser)

func (*CompositeParser) Parse

func (p *CompositeParser) Parse(ctx context.Context, file io.Reader) Result

func (*CompositeParser) SupportedMimeTypes

func (p *CompositeParser) SupportedMimeTypes() []string

type CompositeParserResult

type CompositeParserResult struct {
	Err      error  `json:"error"`
	MimeType string `json:"mimeType"`
	Inner    Result `json:"inner"`
}

func (*CompositeParserResult) Componets

func (r *CompositeParserResult) Componets() []Result

func (*CompositeParserResult) Error

func (r *CompositeParserResult) Error() error

func (*CompositeParserResult) String

func (r *CompositeParserResult) String() string

type ErrMimeTypeNotSupported

type ErrMimeTypeNotSupported struct {
	MimeType *mimetype.MIME
}

func (*ErrMimeTypeNotSupported) Error

func (e *ErrMimeTypeNotSupported) Error() string

type GIFParser

type GIFParser struct {
	// contains filtered or unexported fields
}

Parses `image/gif` files. Only decodes first frame

func NewGIFParser

func NewGIFParser(ocrProvider ocr.Provider) *GIFParser

func (*GIFParser) Parse

func (p *GIFParser) Parse(ctx context.Context, file io.Reader) Result

func (*GIFParser) SupportedMimeTypes

func (p *GIFParser) SupportedMimeTypes() []string

type GIFParserResult

type GIFParserResult struct {
	Text string `json:"text"`
	Err  error  `json:"error"`
}

func (*GIFParserResult) Componets

func (r *GIFParserResult) Componets() []Result

func (*GIFParserResult) Error

func (r *GIFParserResult) Error() error

func (*GIFParserResult) String

func (r *GIFParserResult) String() string

type JPEGParser

type JPEGParser struct {
	// contains filtered or unexported fields
}

Parses `image/jpeg` files

func NewJPEGParser

func NewJPEGParser(ocrProvider ocr.Provider) *JPEGParser

func (*JPEGParser) Parse

func (p *JPEGParser) Parse(ctx context.Context, file io.Reader) Result

func (*JPEGParser) SupportedMimeTypes

func (p *JPEGParser) SupportedMimeTypes() []string

type JPEGParserResult

type JPEGParserResult struct {
	Text string `json:"text"`
	Err  error  `json:"error"`
}

func (*JPEGParserResult) Componets

func (r *JPEGParserResult) Componets() []Result

func (*JPEGParserResult) Error

func (r *JPEGParserResult) Error() error

func (*JPEGParserResult) String

func (r *JPEGParserResult) String() string

type PDFParser

type PDFParser struct {
}

Parses `application/pdf` files

func NewPDFParser

func NewPDFParser(innerParser Parser) *PDFParser

func (*PDFParser) Parse

func (p *PDFParser) Parse(ctx context.Context, file io.Reader) Result

func (*PDFParser) SupportedMimeTypes

func (p *PDFParser) SupportedMimeTypes() []string

type PDFParserResult

type PDFParserResult struct {
	Metadata string                `json:"metadata"`
	Pages    []PDFParserResultPage `json:"pages"`
	Err      error                 `json:"error"`
}

func (*PDFParserResult) Componets

func (r *PDFParserResult) Componets() []Result

func (*PDFParserResult) Error

func (r *PDFParserResult) Error() error

func (*PDFParserResult) String

func (r *PDFParserResult) String() string

type PDFParserResultPage

type PDFParserResultPage struct {
	Text   string   `json:"text"`
	Images []Result `json:"images"`
}

type PNGParser

type PNGParser struct {
	// contains filtered or unexported fields
}

Parses `image/png` files

func NewPNGParser

func NewPNGParser(ocrProvider ocr.Provider) *PNGParser

func (*PNGParser) Parse

func (p *PNGParser) Parse(ctx context.Context, file io.Reader) Result

func (*PNGParser) SupportedMimeTypes

func (p *PNGParser) SupportedMimeTypes() []string

type PNGParserResult

type PNGParserResult struct {
	Text string `json:"text"`
	Err  error  `json:"error"`
}

func (*PNGParserResult) Componets

func (r *PNGParserResult) Componets() []Result

func (*PNGParserResult) Error

func (r *PNGParserResult) Error() error

func (*PNGParserResult) String

func (r *PNGParserResult) String() string

type Parser

type Parser interface {
	// Returns list of supported mime types by this parser
	SupportedMimeTypes() []string
	// Parse file. Thread safe
	Parse(ctx context.Context, file io.Reader) Result
}

func New

func New(ocrProvider ocr.Provider) Parser

Build parser with all possible file types included

type Result

type Result interface {
	// Convert entire result to LLM readable string
	String() string
	// Not empty if there where error
	Error() error
	// Parsed subcomponents. For example images in the PDF or files inside archives
	Componets() []Result
}

Parsing result

type TiffParser

type TiffParser struct {
	// contains filtered or unexported fields
}

Parses `image/tiff` files

func NewTiffParser

func NewTiffParser(ocrProvider ocr.Provider) *TiffParser

func (*TiffParser) Parse

func (p *TiffParser) Parse(ctx context.Context, file io.Reader) Result

func (*TiffParser) SupportedMimeTypes

func (p *TiffParser) SupportedMimeTypes() []string

type TiffParserResult

type TiffParserResult struct {
	Text string `json:"text"`
	Err  error  `json:"error"`
}

func (*TiffParserResult) Componets

func (r *TiffParserResult) Componets() []Result

func (*TiffParserResult) Error

func (r *TiffParserResult) Error() error

func (*TiffParserResult) String

func (r *TiffParserResult) String() string

type WebPParser

type WebPParser struct {
	// contains filtered or unexported fields
}

Parses `image/webp` files

func NewWebPParser

func NewWebPParser(ocrProvider ocr.Provider) *WebPParser

func (*WebPParser) Parse

func (p *WebPParser) Parse(ctx context.Context, file io.Reader) Result

func (*WebPParser) SupportedMimeTypes

func (p *WebPParser) SupportedMimeTypes() []string

type WebPParserResult

type WebPParserResult struct {
	Text string `json:"text"`
	Err  error  `json:"error"`
}

func (*WebPParserResult) Componets

func (r *WebPParserResult) Componets() []Result

func (*WebPParserResult) Error

func (r *WebPParserResult) Error() error

func (*WebPParserResult) String

func (r *WebPParserResult) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL