webclean

package
v1.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 2, 2026 License: MIT Imports: 5 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrBadURL = errors.New("bad URL")

Functions

func ValidateURL

func ValidateURL(rawURL string) error

Types

type ContentExtractor

type ContentExtractor struct{}

func NewContentExtractor

func NewContentExtractor() *ContentExtractor

func (*ContentExtractor) ExtractScored

func (e *ContentExtractor) ExtractScored(text string) (string, float64)

type HTMLCleaner

type HTMLCleaner struct {
	// contains filtered or unexported fields
}

func NewHTMLCleaner

func NewHTMLCleaner() *HTMLCleaner

func (*HTMLCleaner) Clean

func (c *HTMLCleaner) Clean(html string) string

type JSONCleaner

type JSONCleaner struct{}

func NewJSONCleaner

func NewJSONCleaner() *JSONCleaner

func (*JSONCleaner) Clean

func (c *JSONCleaner) Clean(jsonStr string) string

type URLFetcher

type URLFetcher struct {
	// contains filtered or unexported fields
}

func NewURLFetcher

func NewURLFetcher(timeout time.Duration) *URLFetcher

func (*URLFetcher) Fetch

func (f *URLFetcher) Fetch(rawURL string) (string, error)

type WebCleanResult

type WebCleanResult struct {
	OriginalTokens int     `json:"original_tokens"`
	CleanTokens    int     `json:"clean_tokens"`
	ReductionPct   float64 `json:"reduction_pct"`
	ContentType    string  `json:"content_type"`
}

func CalculateReduction

func CalculateReduction(original, cleaned string) *WebCleanResult

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL