Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Article ¶
type Article struct {
Title string `json:"title,omitempty"`
TitleUnmodified string `json:"titleunmodified,omitempty"`
CleanedText string `json:"content,omitempty"`
MetaDescription string `json:"description,omitempty"`
MetaLang string `json:"lang,omitempty"`
MetaFavicon string `json:"favicon,omitempty"`
MetaKeywords string `json:"keywords,omitempty"`
CanonicalLink string `json:"canonicalurl,omitempty"`
Domain string `json:"domain,omitempty"`
TopNode *goquery.Selection `json:"-"`
TopImage string `json:"image,omitempty"`
Tags *set.Set `json:"tags,omitempty"`
Movies *set.Set `json:"movies,omitempty"`
FinalURL string `json:"url,omitempty"`
LinkHash string `json:"linkhash,omitempty"`
RawHTML string `json:"rawhtml,omitempty"`
Doc *goquery.Document `json:"-"`
Links []string `json:"links,omitempty"`
PublishDate *time.Time `json:"publishdate,omitempty"`
AdditionalData map[string]string `json:"additionaldata,omitempty"`
Delta int64 `json:"delta,omitempty"`
}
Article is a collection of properties extracted from the HTML body
type Configuration ¶
type Configuration struct {
TargetLanguage string
BrowserUserAgent string
Debug bool
ExtractPublishDate bool
AdditionalDataExtractor bool
EnableImageFetching bool
UseMetaLanguage bool
StopWords goose.StopWords
Parser *goose.Parser
Timeout time.Duration
// contains filtered or unexported fields
}
Configuration is a wrapper for various config options
func GetDefaultConfiguration ¶
func GetDefaultConfiguration(args ...string) Configuration
GetDefaultConfiguration returns safe default configuration options
Click to show internal directories.
Click to hide internal directories.