Documentation
¶
Index ¶
- Constants
- func IsUnsupportedFormat(err error) bool
- type ConversionError
- type CsvConverter
- type DocumentConverter
- type DocumentConverterResult
- type DocxConverter
- type EpubConverter
- type FailedConversionAttempt
- type HTMLConverter
- type IpynbConverter
- type MarkItDown
- func (m *MarkItDown) Convert(source string) (*DocumentConverterResult, error)
- func (m *MarkItDown) ConvertFile(path string) (*DocumentConverterResult, error)
- func (m *MarkItDown) ConvertReader(r io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
- func (m *MarkItDown) ConvertURL(url string) (*DocumentConverterResult, error)
- func (m *MarkItDown) RegisterConverter(name string, c DocumentConverter, priority float64)
- type Option
- type PdfConverter
- type PlainTextConverter
- type PptxConverter
- type RSSConverter
- type StreamInfo
- type UnsupportedFormatError
- type XlsConverter
- type XlsxConverter
- type ZipConverter
Constants ¶
const ( // PrioritySpecific is for format-specific converters (PDF, DOCX, etc.). PrioritySpecific = 0.0 // PriorityGeneric is for fallback converters (PlainText, HTML, ZIP). PriorityGeneric = 10.0 )
Variables ¶
This section is empty.
Functions ¶
func IsUnsupportedFormat ¶
IsUnsupportedFormat reports whether the error is an UnsupportedFormatError.
Types ¶
type ConversionError ¶
type ConversionError struct {
Attempts []FailedConversionAttempt
}
ConversionError is returned when a converter accepted the input but failed to convert it.
func (*ConversionError) Error ¶
func (e *ConversionError) Error() string
func (*ConversionError) Unwrap ¶
func (e *ConversionError) Unwrap() error
type CsvConverter ¶
type CsvConverter struct{}
CsvConverter handles CSV files.
func NewCsvConverter ¶
func NewCsvConverter() *CsvConverter
NewCsvConverter creates a new CsvConverter.
func (*CsvConverter) Accepts ¶
func (c *CsvConverter) Accepts(info StreamInfo) bool
func (*CsvConverter) Convert ¶
func (c *CsvConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type DocumentConverter ¶
type DocumentConverter interface {
// Accepts returns true if this converter can handle the given input.
// It MUST NOT change the read position of reader.
Accepts(info StreamInfo) bool
// Convert performs the actual document-to-markdown conversion.
Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
}
DocumentConverter is the interface all format converters implement.
type DocumentConverterResult ¶
DocumentConverterResult holds the output of a conversion.
type DocxConverter ¶
type DocxConverter struct {
// contains filtered or unexported fields
}
DocxConverter handles DOCX files.
func NewDocxConverter ¶
func NewDocxConverter(m *MarkItDown) *DocxConverter
NewDocxConverter creates a new DocxConverter.
func (*DocxConverter) Accepts ¶
func (c *DocxConverter) Accepts(info StreamInfo) bool
func (*DocxConverter) Convert ¶
func (c *DocxConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type EpubConverter ¶
type EpubConverter struct {
// contains filtered or unexported fields
}
EpubConverter handles EPUB files.
func NewEpubConverter ¶
func NewEpubConverter(m *MarkItDown) *EpubConverter
NewEpubConverter creates a new EpubConverter.
func (*EpubConverter) Accepts ¶
func (c *EpubConverter) Accepts(info StreamInfo) bool
func (*EpubConverter) Convert ¶
func (c *EpubConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type FailedConversionAttempt ¶
FailedConversionAttempt records a converter that accepted but failed.
type HTMLConverter ¶
type HTMLConverter struct {
// contains filtered or unexported fields
}
HTMLConverter handles HTML files.
func NewHTMLConverter ¶
func NewHTMLConverter(m *MarkItDown) *HTMLConverter
NewHTMLConverter creates a new HTMLConverter.
func (*HTMLConverter) Accepts ¶
func (c *HTMLConverter) Accepts(info StreamInfo) bool
func (*HTMLConverter) Convert ¶
func (c *HTMLConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
func (*HTMLConverter) ConvertString ¶
func (c *HTMLConverter) ConvertString(htmlStr string) (*DocumentConverterResult, error)
ConvertString converts an HTML string to markdown.
type IpynbConverter ¶
type IpynbConverter struct{}
IpynbConverter handles Jupyter notebook files.
func NewIpynbConverter ¶
func NewIpynbConverter() *IpynbConverter
NewIpynbConverter creates a new IpynbConverter.
func (*IpynbConverter) Accepts ¶
func (c *IpynbConverter) Accepts(info StreamInfo) bool
func (*IpynbConverter) Convert ¶
func (c *IpynbConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type MarkItDown ¶
type MarkItDown struct {
// contains filtered or unexported fields
}
MarkItDown is the main document-to-markdown conversion engine.
func New ¶
func New(opts ...Option) *MarkItDown
New creates a new MarkItDown instance with the given options.
func (*MarkItDown) Convert ¶
func (m *MarkItDown) Convert(source string) (*DocumentConverterResult, error)
Convert auto-detects the source type (file path or URL) and converts it.
func (*MarkItDown) ConvertFile ¶
func (m *MarkItDown) ConvertFile(path string) (*DocumentConverterResult, error)
ConvertFile converts a local file to markdown.
func (*MarkItDown) ConvertReader ¶
func (m *MarkItDown) ConvertReader(r io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
ConvertReader converts a stream to markdown using the provided StreamInfo.
func (*MarkItDown) ConvertURL ¶
func (m *MarkItDown) ConvertURL(url string) (*DocumentConverterResult, error)
ConvertURL fetches a URL and converts the response to markdown.
func (*MarkItDown) RegisterConverter ¶
func (m *MarkItDown) RegisterConverter(name string, c DocumentConverter, priority float64)
RegisterConverter adds a custom converter with the given priority. Lower priority values are tried first.
type Option ¶
type Option func(*MarkItDown)
Option configures a MarkItDown instance.
func WithKeepDataURIs ¶
WithKeepDataURIs configures whether to keep full data URIs in output (default: false, which truncates them to data:mime/type;base64...).
func WithStyleMap ¶
WithStyleMap sets custom style mapping for DOCX conversion.
type PdfConverter ¶
type PdfConverter struct{}
PdfConverter handles PDF files using the PDFium library via WebAssembly.
func NewPdfConverter ¶
func NewPdfConverter() *PdfConverter
NewPdfConverter creates a new PdfConverter.
func (*PdfConverter) Accepts ¶
func (c *PdfConverter) Accepts(info StreamInfo) bool
func (*PdfConverter) Convert ¶
func (c *PdfConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type PlainTextConverter ¶
type PlainTextConverter struct{}
PlainTextConverter handles plain text, markdown, JSON, and JSONL files.
func NewPlainTextConverter ¶
func NewPlainTextConverter() *PlainTextConverter
NewPlainTextConverter creates a new PlainTextConverter.
func (*PlainTextConverter) Accepts ¶
func (c *PlainTextConverter) Accepts(info StreamInfo) bool
func (*PlainTextConverter) Convert ¶
func (c *PlainTextConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type PptxConverter ¶
type PptxConverter struct {
// contains filtered or unexported fields
}
PptxConverter handles PPTX files.
func NewPptxConverter ¶
func NewPptxConverter(m *MarkItDown) *PptxConverter
NewPptxConverter creates a new PptxConverter.
func (*PptxConverter) Accepts ¶
func (c *PptxConverter) Accepts(info StreamInfo) bool
func (*PptxConverter) Convert ¶
func (c *PptxConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type RSSConverter ¶
type RSSConverter struct{}
RSSConverter handles RSS and Atom feed files.
func NewRSSConverter ¶
func NewRSSConverter() *RSSConverter
NewRSSConverter creates a new RSSConverter.
func (*RSSConverter) Accepts ¶
func (c *RSSConverter) Accepts(info StreamInfo) bool
func (*RSSConverter) Convert ¶
func (c *RSSConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type StreamInfo ¶
type StreamInfo struct {
MIMEType string
Extension string
Charset string
Filename string
LocalPath string
URL string
}
StreamInfo holds metadata about the input being converted.
type UnsupportedFormatError ¶
UnsupportedFormatError is returned when no converter can handle the input format.
func (*UnsupportedFormatError) Error ¶
func (e *UnsupportedFormatError) Error() string
type XlsConverter ¶
type XlsConverter struct{}
XlsConverter handles legacy XLS files.
func NewXlsConverter ¶
func NewXlsConverter() *XlsConverter
NewXlsConverter creates a new XlsConverter.
func (*XlsConverter) Accepts ¶
func (c *XlsConverter) Accepts(info StreamInfo) bool
func (*XlsConverter) Convert ¶
func (c *XlsConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type XlsxConverter ¶
type XlsxConverter struct{}
XlsxConverter handles XLSX files.
func NewXlsxConverter ¶
func NewXlsxConverter() *XlsxConverter
NewXlsxConverter creates a new XlsxConverter.
func (*XlsxConverter) Accepts ¶
func (c *XlsxConverter) Accepts(info StreamInfo) bool
func (*XlsxConverter) Convert ¶
func (c *XlsxConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)
type ZipConverter ¶
type ZipConverter struct {
// contains filtered or unexported fields
}
ZipConverter handles ZIP files by recursively converting their contents.
func NewZipConverter ¶
func NewZipConverter(m *MarkItDown) *ZipConverter
NewZipConverter creates a new ZipConverter.
func (*ZipConverter) Accepts ¶
func (c *ZipConverter) Accepts(info StreamInfo) bool
func (*ZipConverter) Convert ¶
func (c *ZipConverter) Convert(reader io.ReadSeeker, info StreamInfo) (*DocumentConverterResult, error)