Documentation
¶
Index ¶
- Constants
- Variables
- func DetectFileType(req *ParseRequest) string
- type CSVParser
- type DOCParser
- type DOCXParser
- type EMLParser
- type HTMLParser
- type JSONParser
- type OCRParser
- type PDFParser
- type PPTXParser
- type ParseOptions
- type ParseRequest
- type ParseResult
- type Parser
- type RTFParser
- type Router
- type Section
- type SectionType
- type TXTParser
- type XLSXParser
- type YAMLParser
Constants ¶
View Source
const ( FileTypeUnknown = "unknown" FileTypeTXT = "txt" FileTypeMD = "md" FileTypeCSV = "csv" FileTypeHTML = "html" FileTypeJSON = "json" FileTypeYAML = "yaml" FileTypeYML = "yml" FileTypeEML = "eml" FileTypeRTF = "rtf" FileTypePDF = "pdf" FileTypePNG = "png" FileTypeJPG = "jpg" FileTypeJPEG = "jpeg" FileTypeDOC = "doc" FileTypeDOCX = "docx" FileTypePPTX = "pptx" FileTypeXLSX = "xlsx" )
Variables ¶
View Source
var ( ErrUnsupportedFileType = errors.New("unsupported file type") ErrEmptyInput = errors.New("empty input") )
Functions ¶
func DetectFileType ¶
func DetectFileType(req *ParseRequest) string
Types ¶
type CSVParser ¶
type CSVParser struct{}
func (*CSVParser) Parse ¶
func (p *CSVParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*CSVParser) SupportedTypes ¶
type DOCParser ¶
type DOCParser struct{}
func (*DOCParser) Parse ¶
func (p *DOCParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*DOCParser) SupportedTypes ¶
type DOCXParser ¶
type DOCXParser struct{}
func (*DOCXParser) Parse ¶
func (p *DOCXParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*DOCXParser) Provider ¶
func (p *DOCXParser) Provider() string
func (*DOCXParser) SupportedTypes ¶
func (p *DOCXParser) SupportedTypes() []string
type EMLParser ¶
type EMLParser struct{}
func (*EMLParser) Parse ¶
func (p *EMLParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*EMLParser) SupportedTypes ¶
type HTMLParser ¶
type HTMLParser struct{}
func (*HTMLParser) Parse ¶
func (p *HTMLParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*HTMLParser) Provider ¶
func (p *HTMLParser) Provider() string
func (*HTMLParser) SupportedTypes ¶
func (p *HTMLParser) SupportedTypes() []string
type JSONParser ¶
type JSONParser struct{}
func (*JSONParser) Parse ¶
func (p *JSONParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*JSONParser) Provider ¶
func (p *JSONParser) Provider() string
func (*JSONParser) SupportedTypes ¶
func (p *JSONParser) SupportedTypes() []string
type OCRParser ¶
type OCRParser struct {
Language string
}
func (*OCRParser) Parse ¶
func (p *OCRParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*OCRParser) SupportedTypes ¶
type PDFParser ¶
type PDFParser struct{}
func (*PDFParser) Parse ¶
func (p *PDFParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*PDFParser) SupportedTypes ¶
type PPTXParser ¶
type PPTXParser struct{}
func (*PPTXParser) Parse ¶
func (p *PPTXParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*PPTXParser) Provider ¶
func (p *PPTXParser) Provider() string
func (*PPTXParser) SupportedTypes ¶
func (p *PPTXParser) SupportedTypes() []string
type ParseOptions ¶
type ParseRequest ¶
type ParseResult ¶
type ParseResult struct {
FileType string
FileName string
Text string
Sections []Section
Metadata map[string]any
ParsedAt time.Time
}
func ParseAuto ¶
func ParseAuto(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func ParseBytes ¶
func ParseBytes(ctx context.Context, fileName string, content []byte, opts *ParseOptions) (*ParseResult, error)
func ParsePath ¶
func ParsePath(ctx context.Context, path string, opts *ParseOptions) (*ParseResult, error)
type Parser ¶
type Parser interface {
Provider() string
SupportedTypes() []string
Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
}
type RTFParser ¶
type RTFParser struct{}
func (*RTFParser) Parse ¶
func (p *RTFParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*RTFParser) SupportedTypes ¶
type Router ¶
type Router struct {
// contains filtered or unexported fields
}
func DefaultRouter ¶
func DefaultRouter() *Router
func (*Router) Parse ¶
func (r *Router) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
type SectionType ¶
type SectionType string
const ( SectionTypeUnknown SectionType = "unknown" SectionTypeDocument SectionType = "document" SectionTypePage SectionType = "page" SectionTypeSheet SectionType = "sheet" SectionTypeSlide SectionType = "slide" )
type TXTParser ¶
type TXTParser struct{}
func (*TXTParser) Parse ¶
func (p *TXTParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*TXTParser) SupportedTypes ¶
type XLSXParser ¶
type XLSXParser struct{}
func (*XLSXParser) Parse ¶
func (p *XLSXParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*XLSXParser) Provider ¶
func (p *XLSXParser) Provider() string
func (*XLSXParser) SupportedTypes ¶
func (p *XLSXParser) SupportedTypes() []string
type YAMLParser ¶
type YAMLParser struct{}
func (*YAMLParser) Parse ¶
func (p *YAMLParser) Parse(ctx context.Context, req *ParseRequest, opts *ParseOptions) (*ParseResult, error)
func (*YAMLParser) Provider ¶
func (p *YAMLParser) Provider() string
func (*YAMLParser) SupportedTypes ¶
func (p *YAMLParser) SupportedTypes() []string
Click to show internal directories.
Click to hide internal directories.