parser

package

v0.1.0 Latest Latest Go to latest Published: Feb 4, 2026 License: Apache-2.0 Imports: 18 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/bbiangul/go-reason

Links

Open Source Insights

Documentation ¶

Index ¶

type ComplexityScore
- func DetectComplexity(path string) (*ComplexityScore, error)
- func (cs *ComplexityScore) IsComplex() bool
type DOCXParser
- func (p *DOCXParser) Parse(ctx context.Context, path string) (*ParseResult, error)
- func (p *DOCXParser) SupportedFormats() []string
type LegacyParser
- func (p *LegacyParser) Parse(ctx context.Context, path string) (*ParseResult, error)
- func (p *LegacyParser) SupportedFormats() []string
type LlamaParseConfig
type LlamaParseParser
- func NewLlamaParseParser(cfg LlamaParseConfig) *LlamaParseParser
- func (p *LlamaParseParser) Parse(ctx context.Context, path string) (*ParseResult, error)
- func (p *LlamaParseParser) SupportedFormats() []string
type PDFParser
- func (p *PDFParser) Parse(ctx context.Context, path string) (*ParseResult, error)
- func (p *PDFParser) SupportedFormats() []string
type PDFVisionParser
- func NewPDFVisionParser(provider llm.VisionProvider) *PDFVisionParser
- func (p *PDFVisionParser) Parse(ctx context.Context, path string) (*ParseResult, error)
- func (p *PDFVisionParser) SupportedFormats() []string
type PPTXParser
- func (p *PPTXParser) Parse(ctx context.Context, path string) (*ParseResult, error)
- func (p *PPTXParser) SupportedFormats() []string
type ParseResult
type Parser
type Registry
- func NewRegistry() *Registry
- func (r *Registry) Get(format string) (Parser, error)
- func (r *Registry) Register(format string, p Parser)
- func (r *Registry) SetLlamaParse(cfg LlamaParseConfig)
type Section
type XLSXParser
- func (p *XLSXParser) Parse(ctx context.Context, path string) (*ParseResult, error)
- func (p *XLSXParser) SupportedFormats() []string

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type ComplexityScore ¶

type ComplexityScore struct {
	HasTables   bool
	HasImages   bool
	IsMultiCol  bool
	FontVariety int     // number of distinct fonts
	Score       float64 // 0.0 = simple text, 1.0 = highly complex
}

ComplexityScore represents the structural complexity of a PDF page.

func DetectComplexity ¶

func DetectComplexity(path string) (*ComplexityScore, error)

DetectComplexity analyzes a PDF file for structural complexity.

func (*ComplexityScore) IsComplex ¶

func (cs *ComplexityScore) IsComplex() bool

IsComplex returns true if the PDF should be routed to vision processing.

type DOCXParser ¶

type DOCXParser struct{}

func (*DOCXParser) Parse ¶

func (p *DOCXParser) Parse(ctx context.Context, path string) (*ParseResult, error)

func (*DOCXParser) SupportedFormats ¶

func (p *DOCXParser) SupportedFormats() []string

type LegacyParser ¶

type LegacyParser struct{}

LegacyParser routes legacy binary formats to an external service.

func (*LegacyParser) Parse ¶

func (p *LegacyParser) Parse(ctx context.Context, path string) (*ParseResult, error)

func (*LegacyParser) SupportedFormats ¶

func (p *LegacyParser) SupportedFormats() []string

type LlamaParseConfig ¶

type LlamaParseConfig struct {
	APIKey  string
	BaseURL string
}

type LlamaParseParser ¶

type LlamaParseParser struct {
	// contains filtered or unexported fields
}

func NewLlamaParseParser ¶

func NewLlamaParseParser(cfg LlamaParseConfig) *LlamaParseParser

func (*LlamaParseParser) Parse ¶

func (p *LlamaParseParser) Parse(ctx context.Context, path string) (*ParseResult, error)

func (*LlamaParseParser) SupportedFormats ¶

func (p *LlamaParseParser) SupportedFormats() []string

type PDFParser ¶

type PDFParser struct{}

func (*PDFParser) Parse ¶

func (p *PDFParser) Parse(ctx context.Context, path string) (*ParseResult, error)

func (*PDFParser) SupportedFormats ¶

func (p *PDFParser) SupportedFormats() []string

type PDFVisionParser ¶

type PDFVisionParser struct {
	// contains filtered or unexported fields
}

PDFVisionParser uses a vision LLM to extract text from complex PDF pages (tables, diagrams, multi-column layouts).

func NewPDFVisionParser ¶

func NewPDFVisionParser(provider llm.VisionProvider) *PDFVisionParser

func (*PDFVisionParser) Parse ¶

func (p *PDFVisionParser) Parse(ctx context.Context, path string) (*ParseResult, error)

func (*PDFVisionParser) SupportedFormats ¶

func (p *PDFVisionParser) SupportedFormats() []string

type PPTXParser ¶

type PPTXParser struct{}

func (*PPTXParser) Parse ¶

func (p *PPTXParser) Parse(ctx context.Context, path string) (*ParseResult, error)

func (*PPTXParser) SupportedFormats ¶

func (p *PPTXParser) SupportedFormats() []string

type ParseResult ¶

type ParseResult struct {
	Sections []Section // Ordered sections extracted from the document
	Method   string    // "native", "llamaparse", "vision"
	Metadata map[string]string
}

ParseResult is what a parser produces from a document file.

type Parser ¶

type Parser interface {
	Parse(ctx context.Context, path string) (*ParseResult, error)
	SupportedFormats() []string
}

Parser can parse a specific document format.

type Registry ¶

type Registry struct {
	// contains filtered or unexported fields
}

func NewRegistry ¶

func NewRegistry() *Registry

func (*Registry) Get ¶

func (r *Registry) Get(format string) (Parser, error)

func (*Registry) Register ¶

func (r *Registry) Register(format string, p Parser)

func (*Registry) SetLlamaParse ¶

func (r *Registry) SetLlamaParse(cfg LlamaParseConfig)

type Section ¶

type Section struct {
	Heading    string
	Content    string
	Level      int // Heading level (1=top, 2=sub, etc.)
	PageNumber int
	Type       string // "section", "table", "definition", "requirement", "paragraph"
	Children   []Section
	Metadata   map[string]string
}

Section represents a logical section of a parsed document.

type XLSXParser ¶

type XLSXParser struct{}

func (*XLSXParser) Parse ¶

func (p *XLSXParser) Parse(ctx context.Context, path string) (*ParseResult, error)

func (*XLSXParser) SupportedFormats ¶

func (p *XLSXParser) SupportedFormats() []string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL