Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var ErrBadURL = errors.New("bad URL")
Functions ¶
func ValidateURL ¶
Types ¶
type ContentExtractor ¶
type ContentExtractor struct{}
func NewContentExtractor ¶
func NewContentExtractor() *ContentExtractor
func (*ContentExtractor) ExtractScored ¶
func (e *ContentExtractor) ExtractScored(text string) (string, float64)
type HTMLCleaner ¶
type HTMLCleaner struct {
// contains filtered or unexported fields
}
func NewHTMLCleaner ¶
func NewHTMLCleaner() *HTMLCleaner
func (*HTMLCleaner) Clean ¶
func (c *HTMLCleaner) Clean(html string) string
type JSONCleaner ¶
type JSONCleaner struct{}
func NewJSONCleaner ¶
func NewJSONCleaner() *JSONCleaner
func (*JSONCleaner) Clean ¶
func (c *JSONCleaner) Clean(jsonStr string) string
type URLFetcher ¶
type URLFetcher struct {
// contains filtered or unexported fields
}
func NewURLFetcher ¶
func NewURLFetcher(timeout time.Duration) *URLFetcher
type WebCleanResult ¶
type WebCleanResult struct {
OriginalTokens int `json:"original_tokens"`
CleanTokens int `json:"clean_tokens"`
ReductionPct float64 `json:"reduction_pct"`
ContentType string `json:"content_type"`
}
func CalculateReduction ¶
func CalculateReduction(original, cleaned string) *WebCleanResult
Click to show internal directories.
Click to hide internal directories.