Documentation
¶
Index ¶
- Constants
- type CrawlJob
- func (cj *CrawlJob) AddJS(typ pb.PageReqType, url, js, metaStr string) error
- func (cj *CrawlJob) AddPage(url, metaStr string) error
- func (cj *CrawlJob) IsAlive() bool
- func (cj *CrawlJob) Run()
- func (cj *CrawlJob) SetCallbackXpathMatch(mdata KVMap)
- func (cj *CrawlJob) SetCallbackXpathRegexp(mdata KVMap)
- func (cj *CrawlJob) SetLogin(loginUrl string, loginPayload, loginParseXpath KVMap, loginSuccessCheck KVMap)
- func (cj *CrawlJob) SetLoginChrome(loginUrl string, loginJS string, loginSuccessCheck KVMap)
- func (cj *CrawlJob) Start()
- func (cj *CrawlJob) Stop()
- type KVMap
- type PageHTML
Constants ¶
View Source
const ( PageReqType_BUILTINJS = pb.PageReqType_BUILTINJS PageReqType_JSCRIPT = pb.PageReqType_JSCRIPT )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CrawlJob ¶
type CrawlJob struct {
SeedURL string
MinDelay int32
MaxDelay int32
Follow bool
CallbackUrlRegexp string
FollowUrlRegexp string
CallbackXpathMatch []*pb.KVP
CallbackXpathRegexp []*pb.KVP
MaxConcurrentRequests int32
Useragent string
Impolite bool
Depth int32
Repeat bool
Frequency *google_protobuf1.Duration
Firstrun *google_protobuf.Timestamp
UnsafeNormalizeURL bool
Login bool
LoginUrl string
LoginJS string
LoginPayload []*pb.KVP
LoginParseFields bool
LoginParseXpath []*pb.KVP
LoginSuccessCheck *pb.KVP
CheckLoginAfterEachPage bool
Chrome bool
ChromeBinary string
DomLoadTime int32
NetworkIface string
CancelOnDisconnect bool
CheckContent bool
Prefetch bool
Callback func(*PageHTML, *CrawlJob)
UsePageChan bool
PageChan chan *pb.PageHTML
// contains filtered or unexported fields
}
func NewCrawlJob ¶
func (*CrawlJob) AddJS ¶
func (cj *CrawlJob) AddJS(typ pb.PageReqType, url, js, metaStr string) error
func (*CrawlJob) SetCallbackXpathMatch ¶
func (*CrawlJob) SetCallbackXpathRegexp ¶
func (*CrawlJob) SetLoginChrome ¶
Click to show internal directories.
Click to hide internal directories.