Documentation
¶
Overview ¶
Package spider utilizes a combination of tooling including Katana to perform analysis on http server urls and feed them into the other subpacakges of sleuth for analysis
Index ¶
- func MapOptionsToTypesOptions(options *Options) *types.Options
- type LinkDetails
- type OnResultCallback
- type Option
- func WithAutomaticFormFill(automaticFormFill bool) Option
- func WithBodyReadSize(bodyReadSize int) Option
- func WithChromeDataDir(chromeDataDir string) Option
- func WithChromeWSUrl(chromeWSUrl string) Option
- func WithConcurrency(concurrency int) Option
- func WithCrawlDuration(crawlDuration time.Duration) Option
- func WithCustomHeaders(customHeaders []string) Option
- func WithDebug(debug bool) Option
- func WithDelay(delay int) Option
- func WithDisableRedirects(disableRedirects bool) Option
- func WithDisableUpdateCheck(disableUpdateCheck bool) Option
- func WithDisplayOutScope(displayOutScope bool) Option
- func WithErrorLogFile(errorLogFile string) Option
- func WithExclude(exclude []string) Option
- func WithExtensionFilter(extensionFilter []string) Option
- func WithExtensionsMatch(extensionsMatch []string) Option
- func WithFieldConfig(fieldConfig string) Option
- func WithFieldScope(fieldScope string) Option
- func WithFields(fields string) Option
- func WithFilterRegex(filterRegex []*regexp.Regexp) Option
- func WithFormConfig(formConfig string) Option
- func WithFormExtraction(formExtraction bool) Option
- func WithHeadless(headless bool) Option
- func WithHeadlessNoIncognito(headlessNoIncognito bool) Option
- func WithHeadlessNoSandbox(headlessNoSandbox bool) Option
- func WithHeadlessOptionalArguments(headlessOptionalArguments []string) Option
- func WithHealthCheck(healthCheck bool) Option
- func WithIgnoreQueryParams(ignoreQueryParams bool) Option
- func WithJSON(json bool) Option
- func WithKnownFiles(knownFiles string) Option
- func WithMatchRegex(matchRegex []*regexp.Regexp) Option
- func WithMaxDepth(maxDepth int) Option
- func WithNoClobber(noClobber bool) Option
- func WithNoColors(noColors bool) Option
- func WithNoScope(noScope bool) Option
- func WithOmitBody(omitBody bool) Option
- func WithOmitRaw(omitRaw bool) Option
- func WithOnResult(onResult OnResultCallback) Option
- func WithOutOfScope(outOfScope []string) Option
- func WithOutputFile(outputFile string) Option
- func WithOutputFilterCondition(outputFilterCondition string) Option
- func WithOutputFilterRegex(outputFilterRegex []string) Option
- func WithOutputMatchCondition(outputMatchCondition string) Option
- func WithOutputMatchRegex(outputMatchRegex []string) Option
- func WithParallelism(parallelism int) Option
- func WithPprofServer(pprofServer bool) Option
- func WithProxy(proxy string) Option
- func WithRateLimit(rateLimit int) Option
- func WithRateLimitMinute(rateLimitMinute int) Option
- func WithResolvers(resolvers []string) Option
- func WithResume(resume string) Option
- func WithRetries(retries int) Option
- func WithScope(scope []string) Option
- func WithScrapeJSLuiceResponses(scrapeJSLuiceResponses bool) Option
- func WithScrapeJSResponses(scrapeJSResponses bool) Option
- func WithShowBrowser(showBrowser bool) Option
- func WithSilent(silent bool) Option
- func WithStoreFieldDir(storeFieldDir string) Option
- func WithStoreFields(storeFields string) Option
- func WithStoreResponse(storeResponse bool) Option
- func WithStoreResponseDir(storeResponseDir string) Option
- func WithStrategy(strategy string) Option
- func WithSystemChromePath(systemChromePath string) Option
- func WithTLSImpersonate(tlsImpersonate bool) Option
- func WithTechDetect(techDetect bool) Option
- func WithTimeout(timeout int) Option
- func WithURLs(urls []string) Option
- func WithUseInstalledChrome(useInstalledChrome bool) Option
- func WithVerbose(verbose bool) Option
- func WithVersion(version bool) Option
- func WithXhrExtraction(xhrExtraction bool) Option
- type Options
- type Spider
- type WebSpiderReport
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func MapOptionsToTypesOptions ¶
MapOptionsToTypesOptions maps Options parameters to types.Options
Types ¶
type LinkDetails ¶
type LinkDetails struct {
Link string `json:"link" yaml:"link"`
Status int `json:"status" yaml:"status"`
Technologies []string `json:"technologies" yaml:"technologies"`
}
LinkDetails provides the details of a single link found during a web spider operation
type OnResultCallback ¶
OnResultCallback is a callback function that is called when a result is found
type Option ¶
type Option func(*Options)
Option is a functional option for the katana crawler
func WithAutomaticFormFill ¶
WithAutomaticFormFill enables optional automatic form filling and submission
func WithBodyReadSize ¶
WithBodyReadSize sets the maximum size of response body to read
func WithChromeDataDir ¶
WithChromeDataDir specifies the --user-data-dir to chrome binary to preserve sessions
func WithChromeWSUrl ¶
WithChromeWSUrl specifies the Chrome debugger websocket url for a running Chrome instance to attach to
func WithConcurrency ¶
WithConcurrency sets the number of concurrent crawling goroutines
func WithCrawlDuration ¶
WithCrawlDuration sets the crawl duration
func WithCustomHeaders ¶
WithCustomHeaders sets the custom headers to add to request
func WithDisableRedirects ¶
WithDisableRedirects disables the following of redirects
func WithDisableUpdateCheck ¶
WithDisableUpdateCheck disables automatic update check
func WithDisplayOutScope ¶
WithDisplayOutScope sets the display out of scope flag
func WithErrorLogFile ¶
WithErrorLogFile specifies a file to write with the errors of all requests
func WithExclude ¶
WithExclude sets the exclude filter to use
func WithExtensionFilter ¶
WithExtensionFilter sets the extension filter
func WithExtensionsMatch ¶
WithExtensionsMatch sets the extensions to match
func WithFieldConfig ¶
WithFieldConfig sets the custom field configuration file
func WithFieldScope ¶
WithFieldScope sets the field scope for default DNS scope
func WithFields ¶
WithFields sets the fields to format in output
func WithFilterRegex ¶
WithFilterRegex sets the slice regex to filter url
func WithFormConfig ¶
WithFormConfig sets the form configuration file
func WithFormExtraction ¶
WithFormExtraction enables extraction of form, input, textarea & select elements
func WithHeadlessNoIncognito ¶
WithHeadlessNoIncognito specifies if chrome should be started without incognito mode
func WithHeadlessNoSandbox ¶
WithHeadlessNoSandbox specifies if chrome should be start in --no-sandbox mode
func WithHeadlessOptionalArguments ¶
WithHeadlessOptionalArguments specifies optional arguments to pass to Chrome
func WithHealthCheck ¶
WithHealthCheck determines if a self-healthcheck should be performed
func WithIgnoreQueryParams ¶
WithIgnoreQueryParams ignores crawling same path with different query-param values
func WithKnownFiles ¶
WithKnownFiles sets the known files to crawl
func WithMatchRegex ¶
WithMatchRegex sets the slice regex to match url
func WithMaxDepth ¶
WithMaxDepth sets the maximum depth to crawl
func WithNoClobber ¶
WithNoClobber specifies if katana should overwrite existing output files
func WithNoColors ¶
WithNoColors disables coloring of response output
func WithOmitBody ¶
WithOmitBody omits the response body from the output
func WithOmitRaw ¶
WithOmitRaw omits raw requests/responses from the output
func WithOnResult ¶
func WithOnResult(onResult OnResultCallback) Option
WithOnResult allows callback function on a result
func WithOutOfScope ¶
WithOutOfScope sets the out of scope regexes to use
func WithOutputFile ¶
WithOutputFile sets the output file
func WithOutputFilterCondition ¶
WithOutputFilterCondition sets the output filter condition
func WithOutputFilterRegex ¶
WithOutputFilterRegex sets the regex to filter output url
func WithOutputMatchCondition ¶
WithOutputMatchCondition sets the output match condition
func WithOutputMatchRegex ¶
WithOutputMatchRegex sets the regex to match output url
func WithParallelism ¶
WithParallelism sets the number of urls processing goroutines
func WithPprofServer ¶
WithPprofServer enables pprof server
func WithRateLimit ¶
WithRateLimit sets the rate limit for requests
func WithRateLimitMinute ¶
WithRateLimitMinute sets the rate limit for requests per minute
func WithResolvers ¶
WithResolvers sets the custom resolvers
func WithRetries ¶
WithRetries sets the number of retries for requests
func WithScrapeJSLuiceResponses ¶
WithScrapeJSLuiceResponses enables scraping of endpoints from javascript using jsluice
func WithScrapeJSResponses ¶
WithScrapeJSResponses enables scraping of relative endpoints from javascript
func WithShowBrowser ¶
WithShowBrowser specifies whether the show the browser in headless mode
func WithStoreFieldDir ¶
WithStoreFieldDir specifies if katana should use a custom directory to store fields
func WithStoreFields ¶
WithStoreFields sets the fields to store in separate per-host files
func WithStoreResponse ¶
WithStoreResponse specifies if katana should store http requests/responses
func WithStoreResponseDir ¶
WithStoreResponseDir specifies if katana should use a custom directory to store http requests/responses
func WithStrategy ¶
WithStrategy sets the crawling strategy
func WithSystemChromePath ¶
WithSystemChromePath specifies the chrome binary path for headless crawling
func WithTLSImpersonate ¶
WithTlsImpersonate enables experimental tls ClientHello randomization for standard crawler
func WithTechDetect ¶
WithTechDetect enables technology detection
func WithUseInstalledChrome ¶
WithUseInstalledChrome skips chrome install and use local instance
func WithVerbose ¶
WithVerbose specifies showing verbose output
func WithVersion ¶
WithVersion enables showing of crawler version
func WithXhrExtraction ¶
WithXhrExtraction enables extraction of xhr requests
type Options ¶
type Options struct {
// URLs contains a list of URLs for crawling
URLs goflags.StringSlice
// Resume the scan from the state stored in the resume config file
Resume string
// Exclude host matching specified filter ('cdn', 'private-ips', cidr, ip, regex)
Exclude goflags.StringSlice
// Scope contains a list of regexes for in-scope URLS
Scope goflags.StringSlice
// OutOfScope contains a list of regexes for out-scope URLS
OutOfScope goflags.StringSlice
// NoScope disables host based default scope
NoScope bool
// DisplayOutScope displays out of scope items in results
DisplayOutScope bool
// ExtensionsMatch contains extensions to match explicitly
ExtensionsMatch goflags.StringSlice
// ExtensionFilter contains additional items for filter list
ExtensionFilter goflags.StringSlice
// OutputMatchCondition is the condition to match output
OutputMatchCondition string
// OutputFilterCondition is the condition to filter output
OutputFilterCondition string
// MaxDepth is the maximum depth to crawl
MaxDepth int
// BodyReadSize is the maximum size of response body to read
BodyReadSize int
// Timeout is the time to wait for request in seconds
Timeout int
// CrawlDuration is the duration in seconds to crawl target from
CrawlDuration time.Duration
// Delay is the delay between each crawl requests in seconds
Delay int
// RateLimit is the maximum number of requests to send per second
RateLimit int
// Retries is the number of retries to do for request
Retries int
// RateLimitMinute is the maximum number of requests to send per minute
RateLimitMinute int
// Concurrency is the number of concurrent crawling goroutines
Concurrency int
// Parallelism is the number of urls processing goroutines
Parallelism int
// FormConfig is the path to the form configuration file
FormConfig string
// Proxy is the URL for the proxy server
Proxy string
// Strategy is the crawling strategy. depth-first or breadth-first
Strategy string
// FieldScope is the scope field for default DNS scope
FieldScope string
// OutputFile is the file to write output to
OutputFile string
// KnownFiles enables crawling of knows files like robots.txt, sitemap.xml, etc
KnownFiles string
// Fields is the fields to format in output
Fields string
// StoreFields is the fields to store in separate per-host files
StoreFields string
// FieldConfig is the path to the custom field configuration file
FieldConfig string
// NoColors disables coloring of response output
NoColors bool
// JSON enables writing output in JSON format
JSON bool
// Silent shows only output
Silent bool
// Verbose specifies showing verbose output
Verbose bool
// TechDetect enables technology detection
TechDetect bool
// Version enables showing of crawler version
Version bool
// ScrapeJSResponses enables scraping of relative endpoints from javascript
ScrapeJSResponses bool
// ScrapeJSLuiceResponses enables scraping of endpoints from javascript using jsluice
ScrapeJSLuiceResponses bool
// CustomHeaders is a list of custom headers to add to request
CustomHeaders goflags.StringSlice
// Headless enables headless scraping
Headless bool
// AutomaticFormFill enables optional automatic form filling and submission
AutomaticFormFill bool
// FormExtraction enables extraction of form, input, textarea & select elements
FormExtraction bool
// UseInstalledChrome skips chrome install and use local instance
UseInstalledChrome bool
// ShowBrowser specifies whether the show the browser in headless mode
ShowBrowser bool
// HeadlessOptionalArguments specifies optional arguments to pass to Chrome
HeadlessOptionalArguments goflags.StringSlice
// HeadlessNoSandbox specifies if chrome should be start in --no-sandbox mode
HeadlessNoSandbox bool
// SystemChromePath : Specify the chrome binary path for headless crawling
SystemChromePath string
// ChromeWSUrl : Specify the Chrome debugger websocket url for a running Chrome instance to attach to
ChromeWSUrl string
// OnResult allows callback function on a result
OnResult OnResultCallback
// StoreResponse specifies if katana should store http requests/responses
StoreResponse bool
// StoreResponseDir specifies if katana should use a custom directory to store http requests/responses
StoreResponseDir string
// NoClobber specifies if katana should overwrite existing output files
NoClobber bool
// StoreFieldDir specifies if katana should use a custom directory to store fields
StoreFieldDir string
// OmitRaw omits raw requests/responses from the output
OmitRaw bool
// OmitBody omits the response body from the output
OmitBody bool
// ChromeDataDir : Specify the --user-data-dir to chrome binary to preserve sessions
ChromeDataDir string
// HeadlessNoIncognito specifies if chrome should be started without incognito mode
HeadlessNoIncognito bool
// XhrExtraction extract xhr requests
XhrExtraction bool
// HealthCheck determines if a self-healthcheck should be performed
HealthCheck bool
// PprofServer enables pprof server
PprofServer bool
// ErrorLogFile specifies a file to write with the errors of all requests
ErrorLogFile string
// Resolvers contains custom resolvers
Resolvers goflags.StringSlice
// OutputMatchRegex is the regex to match output url
OutputMatchRegex goflags.StringSlice
// OutputFilterRegex is the regex to filter output url
OutputFilterRegex goflags.StringSlice
// FilterRegex is the slice regex to filter url
FilterRegex []*regexp.Regexp
// MatchRegex is the slice regex to match url
MatchRegex []*regexp.Regexp
// DisableUpdateCheck disables automatic update check
DisableUpdateCheck bool
// IgnoreQueryParams ignore crawling same path with different query-param values
IgnoreQueryParams bool
// Debug
Debug bool
// TlsImpersonate enables experimental tls ClientHello randomization for standard crawler
TLSImpersonate bool
// DisableRedirects disables the following of redirects
DisableRedirects bool
}
Options are the functional parameters for katana
func NewOptions ¶
NewOptions creates a new Options struct with default values and allows overrides
type WebSpiderReport ¶
type WebSpiderReport struct {
Targets []string `json:"targets" yaml:"targets"`
Links []LinkDetails `json:"links" yaml:"links"`
Errors []string `json:"errors" yaml:"errors"`
}
A WebSpiderReport represents a holistic report of all the links that were found during a web spider operation, including non-fatal errors that occurred during the operation
func PerformWebSpider ¶
func PerformWebSpider(targets []string) WebSpiderReport
PerformWebSpider performs a web spider operation against the provided targets, returning a WebSpiderReport with the results of the spider