Documentation
¶
Index ¶
- Variables
- func CssOrXpath(cssSelector CssSelector) string
- func DelaySleep(conf config.RabiConfig, tag string)
- func ExecEventCondition(ctx context.Context, conf config.RabiConfig, event EventSelector, ...) (bool, error)
- type Condition
- type CssSelector
- type Event
- type EventSelector
- type ExecSelector
- type HttpCookies
- type Job
- type Rabida
- type RabidaImpl
- func (r RabidaImpl) Crawl(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) CrawlWithConfig(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, ...) error
- func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), ...) error
- func (r RabidaImpl) Html(ctx context.Context, father *cdp.Node, conf config.RabiConfig) *html.Node
- type SetAttribute
Constants ¶
This section is empty.
Variables ¶
View Source
var ErrNotFound error = errNotFound{}
Functions ¶
func CssOrXpath ¶
func CssOrXpath(cssSelector CssSelector) string
func DelaySleep ¶
func DelaySleep(conf config.RabiConfig, tag string)
func ExecEventCondition ¶
func ExecEventCondition(ctx context.Context, conf config.RabiConfig, event EventSelector, queryActions []chromedp.QueryOption) (bool, error)
Types ¶
type Condition ¶
type Condition struct {
Value string `json:"value"`
CheckFunc func(text, value string) bool
ExecSelector ExecSelector `json:"execSelector"`
}
type CssSelector ¶
type CssSelector struct {
Css string `json:"css"`
// Attr default is innerText
Attr string `json:"attr"`
// Scope supply a scope to each selector
// In jQuery, this would look something like this: $(scope).find(selector)
Scope string `json:"scope"`
// Attrs map each attribute to a css selector. when Attrs equals nil, stop recursively populating
Attrs map[string]CssSelector `json:"attrs"`
// Iframe if true, we will look for the element(s) within the first iframe in the page. if IframeSelector exist, will look for this.
Iframe bool `json:"iframe"`
// IframeSelector specify the iframe selector if have multiple iframe elements
IframeSelector *CssSelector `json:"iframeSelector"`
// XpathScope Note: only choose one between xpath and css selector
XpathScope string `json:"xpathScope"`
// Xpath xpath expression
// eg: //*[@id="zz"]/div[2]/ul/li[1]/text()
// eg: //div[@id="indexCarousel"]//div[@class="item"]//img/@src
Xpath string `json:"xpath"`
SetAttrs []SetAttribute `json:"setAttrs"`
// Before dosomething before retrieve value
Before []EventSelector `json:"before"`
Condition *Condition `json:"condition"`
}
type EventSelector ¶
type EventSelector struct {
Type Event `json:"type"`
Condition Condition `json:"condition"`
Selector CssSelector `json:"selector"`
}
type ExecSelector ¶
type ExecSelector struct {
Type Event `json:"type"`
Selector CssSelector `json:"selector"`
}
type HttpCookies ¶
type Job ¶
type Job struct {
// Link the url you want to crawl
Link string `json:"link"`
// CssSelector root css selector
CssSelector CssSelector `json:"cssSelector"`
// PrePaginate do something before paginate
PrePaginate []EventSelector `json:"prePaginate"`
// Paginator css selector for next page
Paginator CssSelector `json:"paginator"`
PaginatorFunc func(currentPageNo int) CssSelector
// Limit limits how many pages should be crawled
Limit int `json:"limit"`
StartPageBtn CssSelector `json:"startPageBtn"`
StartPageUrl string `json:"startPageUrl"`
EnableCookies HttpCookies `json:"enableCookies"`
}
type Rabida ¶
type Rabida interface {
Crawl(ctx context.Context, job Job,
callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool,
before []chromedp.Action,
after []chromedp.Action,
) error
CrawlWithConfig(ctx context.Context, job Job,
callback func(ret []interface{}, nextPageUrl string, currentPageNo int) bool,
before []chromedp.Action,
after []chromedp.Action,
conf config.RabiConfig,
options ...chromedp.ExecAllocatorOption,
) error
CrawlWithListeners(ctx context.Context, job Job,
callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool,
before []chromedp.Action,
after []chromedp.Action,
confPtr *config.RabiConfig,
options []chromedp.ExecAllocatorOption,
listeners ...func(ev interface{}),
) error
DownloadFile(ctx context.Context, job Job,
callback func(file string),
confPtr *config.RabiConfig,
options ...chromedp.ExecAllocatorOption,
) error
}
func NewRabida ¶
func NewRabida(conf *config.RabiConfig) Rabida
type RabidaImpl ¶
type RabidaImpl struct {
// contains filtered or unexported fields
}
func (RabidaImpl) CrawlWithConfig ¶
func (RabidaImpl) CrawlWithListeners ¶
func (r RabidaImpl) CrawlWithListeners(ctx context.Context, job Job, callback func(ctx context.Context, ret []interface{}, nextPageUrl string, currentPageNo int) bool, before []chromedp.Action, after []chromedp.Action, confPtr *config.RabiConfig, options []chromedp.ExecAllocatorOption, listeners ...func(ev interface{})) error
func (RabidaImpl) DownloadFile ¶
func (r RabidaImpl) DownloadFile(ctx context.Context, job Job, callback func(file string), confPtr *config.RabiConfig, options ...chromedp.ExecAllocatorOption) error
type SetAttribute ¶
Click to show internal directories.
Click to hide internal directories.