Versions in this module Expand all Collapse all v0 v0.1.3 May 11, 2026 Changes in this version + type AdaptiveThresholds struct + HorizontalThreshold float64 + VerticalThreshold float64 + type Alignment int + const AlignmentCenter + const AlignmentJustified + const AlignmentLeft + const AlignmentRight + type Block struct + Box Rect + LineIndices []int + Segments []Segment + type CellBBox struct + Bottom float64 + Top float64 + X0 float64 + X1 float64 + type Chunk struct + EndPage int + HeadingPath []HeadingContext + Index int + StartPage int + Text string + TokenCount int + type ChunkConfig struct + EstimateTokens func(s string) int + MaxTokens int + OverlapTokens int + RepeatHeadings bool + func DefaultChunkConfig() ChunkConfig + type Column struct + Box Rect + Index int + Paragraphs []Paragraph + Words []EnrichedWord + type Config struct + DetectTables bool + EnableMetricsLogging bool + IncludePageBreaks bool + MaxConcurrency int + MinHeadingFontSize float64 + TableSettings TableSettings + UseAdaptiveThresholds bool + UseSegmentBasedTables bool + func DefaultConfig() Config + type Converter struct + func New() (*Converter, error) + func NewConverter(instance pdfium.Pdfium) *Converter + func NewConverterWithConfig(instance pdfium.Pdfium, config Config) *Converter + func NewWithConfig(config Config) (*Converter, error) + func (c *Converter) Close() + func (c *Converter) ConvertBytes(pdfBytes []byte) (string, error) + func (c *Converter) ConvertBytesChunks(pdfBytes []byte, cc ChunkConfig) ([]Chunk, error) + func (c *Converter) ConvertFile(filePath string) (string, error) + func (c *Converter) ConvertFileChunks(filePath string, cc ChunkConfig) ([]Chunk, error) + func (c *Converter) ConvertFileWithMetrics(filePath string) (string, ProcessingMetrics, error) + func (c *Converter) ConvertPageRange(filePath string, startPage, endPage int) (string, error) + func (c *Converter) ConvertReader(reader io.ReadSeeker) (string, error) + func (c *Converter) GetDocumentInfo(filePath string) (*DocumentInfo, error) + type Document struct + Pages []Page + Stats DocumentStats + func (d *Document) ToChunks(config Config, cc ChunkConfig) []Chunk + func (d *Document) ToMarkdown(config Config) string + type DocumentInfo struct + PageCount int + type DocumentStatistics struct + TotalCharacters int + TotalHeadings int + TotalPages int + TotalParagraphs int + TotalTables int + TotalWords int + type DocumentStats struct + FontNameFreq map[string]int + FontSizeFreq map[float64]int + MaxFontSize float64 + MostUsedFontName string + MostUsedFontSize float64 + MostUsedLineGap float64 + type Edge struct + Bottom float64 + Height float64 + Orientation string + Top float64 + Width float64 + X0 float64 + X1 float64 + type EnrichedChar struct + Angle float32 + Box Rect + FillColor RGBA + FontFlags int + FontName string + FontSize float64 + FontWeight int + IsHyphen bool + Text rune + type EnrichedWord struct + Baseline float64 + Box Rect + FillColor RGBA + FontFlags int + FontName string + FontSize float64 + FontWeight int + IsBold bool + IsItalic bool + IsMonospace bool + Rotation float64 + Text string + XHeight float64 + func (w EnrichedWord) IsBulletOrNumber() bool + type HeadingContext struct + Level int + Page int + Text string + type Line struct + Baseline float64 + Box Rect + Words []EnrichedWord + type LineType string + const TableLine + const TextLine + const UnknownLine + type Page struct + Columns []Column + Height float64 + Lines []Edge + Number int + Paragraphs []Paragraph + Quality PageQuality + Tables []Table + Width float64 + func ExtractPage(instance pdfium.Pdfium, page references.FPDF_PAGE, pageNumber int, ...) (*Page, error) + func (p *Page) ToMarkdown() string + type PageExtractor struct + type PageMetrics struct + Duration time.Duration + PageNumber int + type PageQuality struct + AlnumRatio float64 + CharCount int + FragmentedWordRatio float64 + IsLowQuality bool + MeaningfulWordRatio float64 + NonWhitespaceCount int + PUARatio float64 + ReplacementCharRatio float64 + WordCount int + type Paragraph struct + Alignment Alignment + Box Rect + HeadingLevel int + Indent float64 + IsCode bool + IsHeading bool + IsList bool + Lines []Line + func (p Paragraph) CenterX() float64 + func (p Paragraph) Text() string + type Point struct + X float64 + Y float64 + type ProcessingMetrics struct + DocumentOpen time.Duration + PageExtractions []PageMetrics + Statistics DocumentStatistics + TotalTime time.Duration + type RGBA struct + A uint + B uint + G uint + R uint + type Rect struct + X0 float64 + X1 float64 + Y0 float64 + Y1 float64 + func (r Rect) CenterX() float64 + func (r Rect) CenterY() float64 + func (r Rect) Height() float64 + func (r Rect) Width() float64 + type Segment struct + Box Rect + Words []EnrichedWord + type SegmentTableCell struct + Box Rect + Column int + Content string + Row int + type SegmentTableRow struct + Box Rect + Lines []TaggedLine + Segments []Segment + type Table struct + BBox CellBBox + Cells []CellBBox + NumCols int + NumRows int + Rows []TableRow + func DetectTables(page *Page, settings TableSettings) []Table + func DetectTablesSegmentBased(page *Page, thresholds AdaptiveThresholds) []Table + type TableArea struct + Box Rect + Lines []TaggedLine + type TableCell struct + BBox CellBBox + Content string + Words []EnrichedWord + type TableColumn struct + Box Rect + Segments []Segment + type TableRow struct + BBox CellBBox + Cells []TableCell + type TableSettings struct + EdgeMinLength float64 + HorizontalStrategy string + IntersectionTolerance float64 + IntersectionXTolerance float64 + IntersectionYTolerance float64 + JoinTolerance float64 + JoinXTolerance float64 + JoinYTolerance float64 + MinWordsHorizontal int + MinWordsVertical int + SnapTolerance float64 + SnapXTolerance float64 + SnapYTolerance float64 + VerticalStrategy string + func DefaultTableSettings() TableSettings + type TaggedLine struct + Line Line + Segments []Segment + Type LineType + type TextBlock struct + Lines []Line + ReadingDirection string + Rotation float64 + Words []EnrichedWord v0.1.2 Feb 17, 2026 v0.1.1 Feb 17, 2026 v0.1.0 Feb 17, 2026 v0.0.5 Feb 17, 2026 v0.0.4 Feb 17, 2026