convert

package

v0.0.5 Latest Latest Go to latest Published: Apr 14, 2026 License: MIT Imports: 30 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/compozy/kb

Links

Open Source Insights

Documentation ¶

Overview ¶

Package convert provides a converter registry and format-specific converters that transform files (PDF, DOCX, XLSX, PPTX, EPUB, HTML, CSV, JSON, XML, text, images) into markdown.

Index ¶

func HTMLToMarkdown(htmlContent string) (string, error)
type CSVConverter
- func (CSVConverter) Accepts(ext string, mimeType string) bool
- func (CSVConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type DOCXConverter
- func (DOCXConverter) Accepts(ext string, mimeType string) bool
- func (DOCXConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type EPUBConverter
- func (EPUBConverter) Accepts(ext string, mimeType string) bool
- func (EPUBConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type HTMLConverter
- func (HTMLConverter) Accepts(ext string, mimeType string) bool
- func (HTMLConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type ImageConverter
- func (ImageConverter) Accepts(ext string, mimeType string) bool
- func (ImageConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type JSONConverter
- func (JSONConverter) Accepts(ext string, mimeType string) bool
- func (JSONConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type PDFConverter
- func (PDFConverter) Accepts(ext string, mimeType string) bool
- func (PDFConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type PPTXConverter
- func (PPTXConverter) Accepts(ext string, mimeType string) bool
- func (PPTXConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type Registry
- func NewRegistry(converters ...models.Converter) *Registry
- func (r *Registry) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
- func (r *Registry) Match(ext string, mimeType string) models.Converter
- func (r *Registry) Register(converter models.Converter)
type TextConverter
- func (TextConverter) Accepts(ext string, mimeType string) bool
- func (TextConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type UnsupportedInputError
- func (e *UnsupportedInputError) Error() string
type XLSXConverter
- func (XLSXConverter) Accepts(ext string, mimeType string) bool
- func (XLSXConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)
type XMLConverter
- func (XMLConverter) Accepts(ext string, mimeType string) bool
- func (XMLConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func HTMLToMarkdown ¶

func HTMLToMarkdown(htmlContent string) (string, error)

HTMLToMarkdown exposes the shared HTML conversion pipeline for other converters, including EPUB chapter conversion.

Types ¶

type CSVConverter ¶

type CSVConverter struct{}

CSVConverter renders CSV content as a Markdown table.

func (CSVConverter) Accepts ¶

func (CSVConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is CSV.

func (CSVConverter) Convert ¶

func (CSVConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert parses CSV rows and writes a Markdown table.

type DOCXConverter ¶

type DOCXConverter struct{}

DOCXConverter renders DOCX documents as Markdown paragraphs and headings.

func (DOCXConverter) Accepts ¶

func (DOCXConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is DOCX content.

func (DOCXConverter) Convert ¶

func (DOCXConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert transforms a DOCX file into Markdown while preserving paragraphs, heading styles, and core document metadata when present.

type EPUBConverter ¶

type EPUBConverter struct{}

EPUBConverter renders EPUB archives into Markdown by walking the package spine order and converting XHTML chapters through the shared HTML pipeline.

func (EPUBConverter) Accepts ¶

func (EPUBConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is EPUB content.

func (EPUBConverter) Convert ¶

func (EPUBConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert transforms an EPUB archive into concatenated Markdown chapters while surfacing package metadata from the OPF document.

type HTMLConverter ¶

type HTMLConverter struct{}

HTMLConverter renders HTML documents as Markdown.

func (HTMLConverter) Accepts ¶

func (HTMLConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is HTML content.

func (HTMLConverter) Convert ¶

func (HTMLConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert transforms HTML input into Markdown and extracts a title from the document metadata or first heading.

type ImageConverter ¶

type ImageConverter struct{}

ImageConverter preserves supported image metadata when OCR support is not compiled in.

func (ImageConverter) Accepts ¶

func (ImageConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is an image handled by the OCR/no-op path.

func (ImageConverter) Convert ¶

func (ImageConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert returns an empty body and any extracted EXIF metadata.

type JSONConverter ¶

type JSONConverter struct{}

JSONConverter renders JSON content inside a fenced Markdown code block.

func (JSONConverter) Accepts ¶

func (JSONConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is JSON.

func (JSONConverter) Convert ¶

func (JSONConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert pretty-prints JSON and extracts lightweight metadata from top-level scalar fields.

type PDFConverter ¶

type PDFConverter struct{}

PDFConverter renders PDF documents as Markdown using pdfcpu for metadata and page content extraction.

func (PDFConverter) Accepts ¶

func (PDFConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is PDF content.

func (PDFConverter) Convert ¶

func (PDFConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert transforms a PDF into Markdown while preserving page boundaries and surfacing document metadata when available.

type PPTXConverter ¶

type PPTXConverter struct{}

PPTXConverter renders PowerPoint presentations as Markdown grouped by slide.

func (PPTXConverter) Accepts ¶

func (PPTXConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is PPTX content.

func (PPTXConverter) Convert ¶

func (PPTXConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert transforms a PPTX file into Markdown sections grouped by slide and surfaces core document metadata when present.

type Registry ¶

type Registry struct {
	// contains filtered or unexported fields
}

Registry matches inputs to converters in registration order.

func NewRegistry ¶

func NewRegistry(converters ...models.Converter) *Registry

NewRegistry constructs a converter registry in priority order. When no converters are supplied, it registers the built-in stdlib converters.

func (*Registry) Convert ¶

func (r *Registry) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert selects the first matching converter and delegates the conversion.

func (*Registry) Match ¶

func (r *Registry) Match(ext string, mimeType string) models.Converter

Match returns the first registered converter that accepts the extension or MIME type.

func (*Registry) Register ¶

func (r *Registry) Register(converter models.Converter)

Register appends a converter to the registry, preserving registration order.

type TextConverter ¶

type TextConverter struct{}

TextConverter passes plain text and Markdown files through as Markdown.

func (TextConverter) Accepts ¶

func (TextConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is plain text or Markdown.

func (TextConverter) Convert ¶

func (TextConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert returns the input text unchanged and extracts the first title-like line for downstream metadata.

type UnsupportedInputError ¶

type UnsupportedInputError struct {
	FilePath string
	URL      string
	Ext      string
	MIMEType string
}

UnsupportedInputError reports that no registered converter accepts the input.

func (*UnsupportedInputError) Error ¶

func (e *UnsupportedInputError) Error() string

Error returns a human-readable unsupported input message.

type XLSXConverter ¶

type XLSXConverter struct{}

XLSXConverter renders workbook sheets as Markdown tables.

func (XLSXConverter) Accepts ¶

func (XLSXConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is XLSX content.

func (XLSXConverter) Convert ¶

func (XLSXConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert transforms workbook sheets into Markdown tables while surfacing core document metadata when present.

type XMLConverter ¶

type XMLConverter struct{}

XMLConverter extracts text content from XML documents.

func (XMLConverter) Accepts ¶

func (XMLConverter) Accepts(ext string, mimeType string) bool

Accepts reports whether the input is XML.

func (XMLConverter) Convert ¶

func (XMLConverter) Convert(ctx context.Context, input models.ConvertInput) (*models.ConvertResult, error)

Convert strips tags and keeps the document's text content.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL