processor

package
v1.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 17, 2026 License: MIT Imports: 5 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ContentProcessor

type ContentProcessor struct {
}

func NewContentProcessor

func NewContentProcessor() *ContentProcessor

func (*ContentProcessor) CleanNewlines

func (cp *ContentProcessor) CleanNewlines(text string) string

CleanNewlines removes unwanted newlines that break up sentences

func (*ContentProcessor) Process

func (cp *ContentProcessor) Process(html, url string, opts ProcessOptions) (*ProcessedContent, error)

func (*ContentProcessor) ProcessFromReader

func (cp *ContentProcessor) ProcessFromReader(r io.Reader, url string, opts ProcessOptions) (*ProcessedContent, error)

func (*ContentProcessor) ToMarkdown

func (cp *ContentProcessor) ToMarkdown(content *ProcessedContent, includeMetadata bool, preserveLinks bool) string

func (*ContentProcessor) ToText

func (cp *ContentProcessor) ToText(content *ProcessedContent, lineWidth int) string
type Link struct {
	Text string
	URL  string
}

type ProcessOptions

type ProcessOptions struct {
	RemoveAds        bool
	CleanHTML        bool
	MinContentLength int
	IncludeMetadata  bool
	MetadataFields   []string
}

type ProcessedContent

type ProcessedContent struct {
	Title       string
	Content     string
	TextContent string
	Author      string
	Excerpt     string
	Byline      string
	Length      int
	Metadata    map[string]string
	Images      []string
	Links       []Link
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL