Documentation
¶
Index ¶
- Variables
- func CssOrXpath(cssSelector CssSelector) string
- func DelaySleep(conf config.RabiConfig, tag string)
- func ExecEventCondition(ctx context.Context, conf config.RabiConfig, event EventSelector, ...) (bool, error)
- type Condition
- type CssSelector
- type Event
- type EventSelector
- type ExecSelector
- type HttpCookies
- type Job
- type Rabida
- type RabidaImpl
- func (r RabidaImpl) Crawl(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) CrawlTraversal(ctx context.Context, conf *config.RabiConfig) error
- func (r RabidaImpl) CrawlWithConfig(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), ...) error
- func (r RabidaImpl) Html(ctx context.Context, father *cdp.Node, conf config.RabiConfig) *html.Node
- type SetAttribute
Constants ¶
This section is empty.
Variables ¶
View Source
var ErrNotFound error = errNotFound{}
Functions ¶
func CssOrXpath ¶
func CssOrXpath(cssSelector CssSelector) string
func DelaySleep ¶
func DelaySleep(conf config.RabiConfig, tag string)
func ExecEventCondition ¶
func ExecEventCondition(ctx context.Context, conf config.RabiConfig, event EventSelector, queryActions []chromedp.QueryOption) (bool, error)
Types ¶
type Condition ¶
type Condition struct {
Value string
CheckFunc func(text, value string) bool
ExecSelector ExecSelector
}
type CssSelector ¶
type CssSelector struct {
Css string
// Attr default is innerText
Attr string
// Scope supply a scope to each selector
// In jQuery, this would look something like this: $(scope).find(selector)
Scope string
// Attrs map each attribute to a css selector. when Attrs equals nil, stop recursively populating
Attrs map[string]CssSelector
// Iframe if true, we will look for the element(s) within the first iframe in the page
Iframe bool
// XpathScope Note: only choose one between xpath and css selector
XpathScope string
// Xpath xpath expression
// eg: //*[@id="zz"]/div[2]/ul/li[1]/text()
// eg: //div[@id="indexCarousel"]//div[@class="item"]//img/@src
Xpath string
SetAttrs []SetAttribute
// Before dosomething before retrieve value
Before []EventSelector
}
type EventSelector ¶
type EventSelector struct {
Type Event
Condition Condition
Selector CssSelector
}
type ExecSelector ¶
type ExecSelector struct {
Type Event
Selector CssSelector
}
type HttpCookies ¶
type Job ¶
type Job struct {
// Link the url you want to crawl
Link string
// CssSelector root css selector
CssSelector CssSelector
// PrePaginate do something before paginate
PrePaginate []EventSelector
// Paginator css selector for next page
Paginator CssSelector
// Limit limits how many pages should be crawled
Limit int
StartPageBtn CssSelector
StartPageUrl string
EnableCookies HttpCookies
}
type Rabida ¶
type Rabida interface {
Crawl(ctx context.Context, job Job,
callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool,
before []chromedp.Action,
after []chromedp.Action,
) error
CrawlWithConfig(ctx context.Context, job Job,
callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool,
before []chromedp.Action,
after []chromedp.Action,
conf config.RabiConfig,
options ...chromedp.ExecAllocatorOption,
) error
CrawlWithListeners(ctx context.Context, job Job,
callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool,
before []chromedp.Action,
after []chromedp.Action,
confPtr *config.RabiConfig,
options []chromedp.ExecAllocatorOption,
listeners ...func(ev interface{}),
) error
DownloadFile(ctx context.Context, job Job,
callback func(file string),
confPtr *config.RabiConfig,
options ...chromedp.ExecAllocatorOption,
) error
CrawlTraversal(ctx context.Context, conf *config.RabiConfig) error
}
func NewRabida ¶
func NewRabida(conf *config.RabiConfig) Rabida
type RabidaImpl ¶
type RabidaImpl struct {
// contains filtered or unexported fields
}
func (RabidaImpl) CrawlTraversal ¶ added in v0.2.4
func (r RabidaImpl) CrawlTraversal(ctx context.Context, conf *config.RabiConfig) error
func (RabidaImpl) CrawlWithConfig ¶
func (RabidaImpl) CrawlWithListeners ¶
func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, confPtr *config.RabiConfig, options []chromedp.ExecAllocatorOption, listeners ...func(ev interface{})) error
func (RabidaImpl) DownloadFile ¶
func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), confPtr *config.RabiConfig, options ...chromedp.ExecAllocatorOption) error
type SetAttribute ¶
Click to show internal directories.
Click to hide internal directories.