Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CrawlerOptions ¶
type CrawlerOptions struct {
// OutputWriter is the interface for writing output
OutputWriter output.Writer
// RateLimit is a mechanism for controlling request rate limit
RateLimit *ratelimit.Limiter
// Parser is a mechanism for extracting new URLS from responses
Parser *parser.Parser
// Options contains the user specified configuration options
Options *Options
// ExtensionsValidator is a validator for file extensions
ExtensionsValidator *extensions.Validator
// UniqueFilter is a filter for deduplication of unique items
UniqueFilter filters.Filter
// ScopeManager is a manager for validating crawling scope
ScopeManager *scope.Manager
// Dialer is instance of the dialer for global crawler
Dialer *fastdialer.Dialer
// Wappalyzer instance for technologies detection
Wappalyzer *wappalyzer.Wappalyze
}
CrawlerOptions contains helper utilities for the crawler
func NewCrawlerOptions ¶
func NewCrawlerOptions(options *Options) (*CrawlerOptions, error)
NewCrawlerOptions creates a new crawler options structure from user specified options.
func (*CrawlerOptions) Close ¶
func (c *CrawlerOptions) Close() error
Close closes the crawler options resources
func (*CrawlerOptions) ValidatePath ¶ added in v1.0.0
func (c *CrawlerOptions) ValidatePath(path string) bool
func (*CrawlerOptions) ValidateScope ¶ added in v1.0.0
func (c *CrawlerOptions) ValidateScope(absURL, rootHostname string) (bool, error)
ValidateScope validates scope for an AbsURL
type OnResultCallback ¶ added in v0.0.3
OnResultCallback (output.Result)
type OnSkipURLCallback ¶ added in v1.2.0
type OnSkipURLCallback func(string)
OnSkipURLCallback (string)
type Options ¶
type Options struct {
// URLs contains a list of URLs for crawling
URLs goflags.StringSlice
// Resume the scan from the state stored in the resume config file
Resume string
// Exclude host matching specified filter ('cdn', 'private-ips', cidr, ip, regex)
Exclude goflags.StringSlice
// Scope contains a list of regexes for in-scope URLS
Scope goflags.StringSlice
// OutOfScope contains a list of regexes for out-scope URLS
OutOfScope goflags.StringSlice
// NoScope disables host based default scope
NoScope bool
// DisplayOutScope displays out of scope items in results
DisplayOutScope bool
// ExtensionsMatch contains extensions to match explicitly
ExtensionsMatch goflags.StringSlice
// ExtensionFilter contains additional items for filter list
ExtensionFilter goflags.StringSlice
// NoDefaultExtFilter removes the default extensions from the filter list
NoDefaultExtFilter bool
// OutputMatchCondition is the condition to match output
OutputMatchCondition string
// OutputFilterCondition is the condition to filter output
OutputFilterCondition string
// MaxDepth is the maximum depth to crawl
MaxDepth int
// BodyReadSize is the maximum size of response body to read
BodyReadSize int
// Timeout is the time to wait for request in seconds
Timeout int
// TimeStable is the time to wait until the page is stable
TimeStable int
// CrawlDuration is the duration in seconds to crawl target from
CrawlDuration time.Duration
// Delay is the delay between each crawl requests in seconds
Delay int
// RateLimit is the maximum number of requests to send per second
RateLimit int
// Retries is the number of retries to do for request
Retries int
// RateLimitMinute is the maximum number of requests to send per minute
RateLimitMinute int
// Concurrency is the number of concurrent crawling goroutines
Concurrency int
// Parallelism is the number of urls processing goroutines
Parallelism int
// FormConfig is the path to the form configuration file
FormConfig string
// Proxy is the URL for the proxy server
Proxy string
// Strategy is the crawling strategy. depth-first or breadth-first
Strategy string
// FieldScope is the scope field for default DNS scope
FieldScope string
// OutputFile is the file to write output to
OutputFile string
// KnownFiles enables crawling of knows files like robots.txt, sitemap.xml, etc
KnownFiles string
// Fields is the fields to format in output
Fields string
// StoreFields is the fields to store in separate per-host files
StoreFields string
// FieldConfig is the path to the custom field configuration file
FieldConfig string
// NoColors disables coloring of response output
NoColors bool
// JSON enables writing output in JSON format
JSON bool
// ExcludeOutputFields is the list of fields to exclude from the output
ExcludeOutputFields goflags.StringSlice
// ListOutputFields is the list of fields
ListOutputFields bool
// Silent shows only output
Silent bool
// Verbose specifies showing verbose output
Verbose bool
// TechDetect enables technology detection
TechDetect bool
// Version enables showing of crawler version
Version bool
// ScrapeJSResponses enables scraping of relative endpoints from javascript
ScrapeJSResponses bool
// ScrapeJSLuiceResponses enables scraping of endpoints from javascript using jsluice
ScrapeJSLuiceResponses bool
// CustomHeaders is a list of custom headers to add to request
CustomHeaders goflags.StringSlice
// Headless enables headless scraping
Headless bool
// AutomaticFormFill enables optional automatic form filling and submission
AutomaticFormFill bool
// FormExtraction enables extraction of form, input, textarea & select elements
FormExtraction bool
// UseInstalledChrome skips chrome install and use local instance
UseInstalledChrome bool
// ShowBrowser specifies whether the show the browser in headless mode
ShowBrowser bool
// HeadlessOptionalArguments specifies optional arguments to pass to Chrome
HeadlessOptionalArguments goflags.StringSlice
// HeadlessNoSandbox specifies if chrome should be start in --no-sandbox mode
HeadlessNoSandbox bool
// SystemChromePath : Specify the chrome binary path for headless crawling
SystemChromePath string
// ChromeWSUrl : Specify the Chrome debugger websocket url for a running Chrome instance to attach to
ChromeWSUrl string
// OnResult allows callback function on a result
OnResult OnResultCallback
// OnSkipURL allows callback function on a skipped url
OnSkipURL OnSkipURLCallback
// StoreResponse specifies if katana should store http requests/responses
StoreResponse bool
// StoreResponseDir specifies if katana should use a custom directory to store http requests/responses
StoreResponseDir string
// NoClobber specifies if katana should overwrite existing output files
NoClobber bool
// StoreFieldDir specifies if katana should use a custom directory to store fields
StoreFieldDir string
// OmitRaw omits raw requests/responses from the output
OmitRaw bool
// OmitBody omits the response body from the output
OmitBody bool
// ChromeDataDir : Specify the --user-data-dir to chrome binary to preserve sessions
ChromeDataDir string
// HeadlessNoIncognito specifies if chrome should be started without incognito mode
HeadlessNoIncognito bool
// XhrExtraction extract xhr requests
XhrExtraction bool
// HealthCheck determines if a self-healthcheck should be performed
HealthCheck bool
// PprofServer enables pprof server
PprofServer bool
// ErrorLogFile specifies a file to write with the errors of all requests
ErrorLogFile string
// Resolvers contains custom resolvers
Resolvers goflags.StringSlice
// OutputTemplate enables custom output template
OutputTemplate string
// OutputMatchRegex is the regex to match output url
OutputMatchRegex goflags.StringSlice
// OutputFilterRegex is the regex to filter output url
OutputFilterRegex goflags.StringSlice
// FilterRegex is the slice regex to filter url
FilterRegex []*regexp.Regexp
// MatchRegex is the slice regex to match url
MatchRegex []*regexp.Regexp
//DisableUpdateCheck disables automatic update check
DisableUpdateCheck bool
//IgnoreQueryParams ignore crawling same path with different query-param values
IgnoreQueryParams bool
// Debug
Debug bool
// TlsImpersonate enables experimental tls ClientHello randomization for standard crawler
TlsImpersonate bool
// DisableRedirects disables the following of redirects
DisableRedirects bool
// PathClimb enables path expansion (auto crawl discovered paths)
PathClimb bool
// DisableUniqueFilter disables duplicate content filtering
DisableUniqueFilter bool
// MaxOnclickLinks is the maximum number of onclick links to process per page (default: 10)
MaxOnclickLinks int
}
var DefaultOptions Options
func (*Options) ConfigureOutput ¶ added in v1.1.1
func (options *Options) ConfigureOutput()
ConfigureOutput configures the output logging levels to be displayed on the screen
func (*Options) ParseCustomHeaders ¶
func (*Options) ParseHeadlessOptionalArguments ¶ added in v0.0.2
func (*Options) ShouldResume ¶ added in v1.0.4
Click to show internal directories.
Click to hide internal directories.