types

package

v1.3.0 Latest Latest Go to latest Published: Dec 1, 2025 License: MIT Imports: 20 Imported by: 28

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/projectdiscovery/katana

Links

Documentation ¶

Index ¶

type CrawlerOptions
- func NewCrawlerOptions(options *Options) (*CrawlerOptions, error)
type OnResultCallback
type OnSkipURLCallback
type Options

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type CrawlerOptions ¶

type CrawlerOptions struct {
	// OutputWriter is the interface for writing output
	OutputWriter output.Writer
	// RateLimit is a mechanism for controlling request rate limit
	RateLimit *ratelimit.Limiter
	// Parser is a mechanism for extracting new URLS from responses
	Parser *parser.Parser
	// Options contains the user specified configuration options
	Options *Options
	// ExtensionsValidator is a validator for file extensions
	ExtensionsValidator *extensions.Validator
	// UniqueFilter is a filter for deduplication of unique items
	UniqueFilter filters.Filter
	// ScopeManager is a manager for validating crawling scope
	ScopeManager *scope.Manager
	// Dialer is instance of the dialer for global crawler
	Dialer *fastdialer.Dialer
	// Wappalyzer instance for technologies detection
	Wappalyzer *wappalyzer.Wappalyze
}

CrawlerOptions contains helper utilities for the crawler

func NewCrawlerOptions ¶

func NewCrawlerOptions(options *Options) (*CrawlerOptions, error)

NewCrawlerOptions creates a new crawler options structure from user specified options.

func (*CrawlerOptions) Close ¶

func (c *CrawlerOptions) Close() error

Close closes the crawler options resources

func (*CrawlerOptions) ValidatePath ¶ added in v1.0.0

func (c *CrawlerOptions) ValidatePath(path string) bool

func (*CrawlerOptions) ValidateScope ¶ added in v1.0.0

func (c *CrawlerOptions) ValidateScope(absURL, rootHostname string) (bool, error)

ValidateScope validates scope for an AbsURL

type OnResultCallback ¶ added in v0.0.3

type OnResultCallback func(output.Result)

OnResultCallback (output.Result)

type OnSkipURLCallback ¶ added in v1.2.0

type OnSkipURLCallback func(string)

OnSkipURLCallback (string)

type Options ¶

type Options struct {
	// URLs contains a list of URLs for crawling
	URLs goflags.StringSlice
	// Resume the scan from the state stored in the resume config file
	Resume string
	// Exclude host matching specified filter ('cdn', 'private-ips', cidr, ip, regex)
	Exclude goflags.StringSlice
	// Scope contains a list of regexes for in-scope URLS
	Scope goflags.StringSlice
	// OutOfScope contains a list of regexes for out-scope URLS
	OutOfScope goflags.StringSlice
	// NoScope disables host based default scope
	NoScope bool
	// DisplayOutScope displays out of scope items in results
	DisplayOutScope bool
	// ExtensionsMatch contains extensions to match explicitly
	ExtensionsMatch goflags.StringSlice
	// ExtensionFilter contains additional items for filter list
	ExtensionFilter goflags.StringSlice
	// NoDefaultExtFilter removes the default extensions from the filter list
	NoDefaultExtFilter bool
	// OutputMatchCondition is the condition to match output
	OutputMatchCondition string
	// OutputFilterCondition is the condition to filter output
	OutputFilterCondition string
	// MaxDepth is the maximum depth to crawl
	MaxDepth int
	// BodyReadSize is the maximum size of response body to read
	BodyReadSize int
	// Timeout is the time to wait for request in seconds
	Timeout int
	// TimeStable is the time to wait until the page is stable
	TimeStable int
	// CrawlDuration is the duration in seconds to crawl target from
	CrawlDuration time.Duration
	// Delay is the delay between each crawl requests in seconds
	Delay int
	// RateLimit is the maximum number of requests to send per second
	RateLimit int
	// Retries is the number of retries to do for request
	Retries int
	// RateLimitMinute is the maximum number of requests to send per minute
	RateLimitMinute int
	// Concurrency is the number of concurrent crawling goroutines
	Concurrency int
	// Parallelism is the number of urls processing goroutines
	Parallelism int
	// FormConfig is the path to the form configuration file
	FormConfig string
	// Proxy is the URL for the proxy server
	Proxy string
	// Strategy is the crawling strategy. depth-first or breadth-first
	Strategy string
	// FieldScope is the scope field for default DNS scope
	FieldScope string
	// OutputFile is the file to write output to
	OutputFile string
	// KnownFiles enables crawling of knows files like robots.txt, sitemap.xml, etc
	KnownFiles string
	// Fields is the fields to format in output
	Fields string
	// StoreFields is the fields to store in separate per-host files
	StoreFields string
	// FieldConfig is the path to the custom field configuration file
	FieldConfig string
	// NoColors disables coloring of response output
	NoColors bool
	// JSON enables writing output in JSON format
	JSON bool
	// ExcludeOutputFields is the list of fields to exclude from the output
	ExcludeOutputFields goflags.StringSlice
	// ListOutputFields is the list of fields
	ListOutputFields bool
	// Silent shows only output
	Silent bool
	// Verbose specifies showing verbose output
	Verbose bool
	// TechDetect enables technology detection
	TechDetect bool
	// Version enables showing of crawler version
	Version bool
	// ScrapeJSResponses enables scraping of relative endpoints from javascript
	ScrapeJSResponses bool
	// ScrapeJSLuiceResponses enables scraping of endpoints from javascript using jsluice
	ScrapeJSLuiceResponses bool
	// CustomHeaders is a list of custom headers to add to request
	CustomHeaders goflags.StringSlice
	// Headless enables headless scraping
	Headless bool
	// AutomaticFormFill enables optional automatic form filling and submission
	AutomaticFormFill bool
	// FormExtraction enables extraction of form, input, textarea & select elements
	FormExtraction bool
	// UseInstalledChrome skips chrome install and use local instance
	UseInstalledChrome bool
	// ShowBrowser specifies whether the show the browser in headless mode
	ShowBrowser bool
	// HeadlessOptionalArguments specifies optional arguments to pass to Chrome
	HeadlessOptionalArguments goflags.StringSlice
	// HeadlessNoSandbox specifies if chrome should be start in --no-sandbox mode
	HeadlessNoSandbox bool
	// SystemChromePath : Specify the chrome binary path for headless crawling
	SystemChromePath string
	// ChromeWSUrl : Specify the Chrome debugger websocket url for a running Chrome instance to attach to
	ChromeWSUrl string
	// OnResult allows callback function on a result
	OnResult OnResultCallback
	// OnSkipURL allows callback function on a skipped url
	OnSkipURL OnSkipURLCallback
	// StoreResponse specifies if katana should store http requests/responses
	StoreResponse bool
	// StoreResponseDir specifies if katana should use a custom directory to store http requests/responses
	StoreResponseDir string
	// NoClobber specifies if katana should overwrite existing output files
	NoClobber bool
	// StoreFieldDir specifies if katana should use a custom directory to store fields
	StoreFieldDir string
	// OmitRaw omits raw requests/responses from the output
	OmitRaw bool
	// OmitBody omits the response body from the output
	OmitBody bool
	// ChromeDataDir : 	Specify the --user-data-dir to chrome binary to preserve sessions
	ChromeDataDir string
	// HeadlessNoIncognito specifies if chrome should be started without incognito mode
	HeadlessNoIncognito bool
	// XhrExtraction extract xhr requests
	XhrExtraction bool
	// HealthCheck determines if a self-healthcheck should be performed
	HealthCheck bool
	// PprofServer enables pprof server
	PprofServer bool
	// ErrorLogFile specifies a file to write with the errors of all requests
	ErrorLogFile string
	// Resolvers contains custom resolvers
	Resolvers goflags.StringSlice
	// OutputTemplate enables custom output template
	OutputTemplate string
	// OutputMatchRegex is the regex to match output url
	OutputMatchRegex goflags.StringSlice
	// OutputFilterRegex is the regex to filter output url
	OutputFilterRegex goflags.StringSlice
	// FilterRegex is the slice regex to filter url
	FilterRegex []*regexp.Regexp
	// MatchRegex is the slice regex to match url
	MatchRegex []*regexp.Regexp
	//DisableUpdateCheck disables automatic update check
	DisableUpdateCheck bool
	//IgnoreQueryParams ignore crawling same path with different query-param values
	IgnoreQueryParams bool
	// Debug
	Debug bool
	// TlsImpersonate enables experimental tls ClientHello randomization for standard crawler
	TlsImpersonate bool
	// DisableRedirects disables the following of redirects
	DisableRedirects bool
	// PathClimb enables path expansion (auto crawl discovered paths)
	PathClimb bool
	// DisableUniqueFilter disables duplicate content filtering
	DisableUniqueFilter bool
	// MaxOnclickLinks is the maximum number of onclick links to process per page (default: 10)
	MaxOnclickLinks int
}

var DefaultOptions Options

func (*Options) ConfigureOutput ¶ added in v1.1.1

func (options *Options) ConfigureOutput()

ConfigureOutput configures the output logging levels to be displayed on the screen

func (*Options) ParseCustomHeaders ¶

func (options *Options) ParseCustomHeaders() map[string]string

func (*Options) ParseHeadlessOptionalArguments ¶ added in v0.0.2

func (options *Options) ParseHeadlessOptionalArguments() map[string]string

func (*Options) ShouldResume ¶ added in v1.0.4

func (options *Options) ShouldResume() bool

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL