Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type CLIArguments ¶
type CLIArguments struct {
cli.Helper
Seed []string `cli:"*seed" usage:"The start page (seed) of the crawl, example: https://google.com"`
MaxPages int `cli:"max" usage:"Max number of pages that will be scanned, for each domain" dft:"10"`
Delay int `cli:"delay" usage:"Milliseconds between 2 page visits, for each domain" dft:"400"`
RobotUserAgent string `cli:"robot" usage:"Name of the robot, for robots.txt" dft:"Googlebot"`
UserAgent string `` /* 126-byte string literal not displayed */
Debug bool `cli:"debug" usage:"Print all pages that are found"`
Query []string `cli:"query" usage:"Add custom query params to all requests"`
Headers []string `cli:"header" usage:"Add one or more HTTP request headers to all requests" dft:"X-hotcache:crawler"`
}
CLIArguments for the scanner function
type CustomCrawler ¶
type CustomCrawler struct {
gocrawl.DefaultExtender // Will use the default implementation of all but Visit and Filter
}
CustomCrawler of https://github.com/PuerkitoBio/gocrawl
func (*CustomCrawler) Fetch ¶
func (x *CustomCrawler) Fetch(ctx *gocrawl.URLContext, userAgent string, headRequest bool) (*http.Response, error)
Fetch overrides the default implementation in order to add custom parameters and headers
Click to show internal directories.
Click to hide internal directories.