parser

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 1, 2026 License: MIT Imports: 6 Imported by: 0

Documentation

Overview

Package parser provides HTML and JavaScript parsing for the crawler.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ExtractURLsFromText

func ExtractURLsFromText(text string) []string

ExtractURLsFromText extracts URLs from plain text.

Types

type APIEndpoint

type APIEndpoint struct {
	URL        string
	Method     string
	Parameters []string
	SourceLine int
	Context    string
}

APIEndpoint represents a discovered API endpoint.

type APIInfo

type APIInfo struct {
	Endpoints []Endpoint
	BaseURL   string
	Version   string
	Type      APIType // REST, GraphQL, SOAP, etc.
}

APIInfo represents discovered API information.

type APIParser

type APIParser struct{}

APIParser extracts API information from various sources.

func NewAPIParser

func NewAPIParser() *APIParser

NewAPIParser creates a new API parser.

func (*APIParser) ParseFromResponse

func (p *APIParser) ParseFromResponse(url, contentType string, body []byte) *APIInfo

ParseFromResponse extracts API information from a response.

func (*APIParser) ParseGraphQLSchema

func (p *APIParser) ParseGraphQLSchema(schema string) *APIInfo

ParseGraphQLSchema parses a GraphQL schema.

func (*APIParser) ParseOpenAPISpec

func (p *APIParser) ParseOpenAPISpec(spec []byte) (*APIInfo, error)

ParseOpenAPISpec parses an OpenAPI/Swagger specification.

type APIType

type APIType string

APIType represents the type of API.

const (
	APITypeREST    APIType = "rest"
	APITypeGraphQL APIType = "graphql"
	APITypeSOAP    APIType = "soap"
	APITypeRPC     APIType = "rpc"
	APITypeUnknown APIType = "unknown"
)

type AnalyzeResult

type AnalyzeResult struct {
	Form       Form
	FormType   FormType
	HasCSRF    bool
	CSRFField  string
	HasCaptcha bool
	IsLogin    bool
	IsSignup   bool
	IsSearch   bool
	IsContact  bool
	IsPayment  bool
	IsUpload   bool
	Complexity int // 1-10 complexity score
}

AnalyzeResult contains detailed form analysis results.

type ButtonInfo

type ButtonInfo struct {
	Name       string
	Type       string
	Value      string
	Text       string
	FormAction string
}

ButtonInfo represents a form button.

type Endpoint

type Endpoint struct {
	URL            string
	Method         string
	Source         string
	Depth          int
	Parameters     []Parameter
	Headers        map[string]string
	DiscoveredFrom string
	StatusCode     int
	ContentType    string
	ResponseSize   int64
	Timestamp      time.Time
}

Endpoint represents a discovered API endpoint.

type Form

type Form struct {
	URL       string
	Action    string
	Method    string
	Enctype   string
	Inputs    []FormInput
	HasCSRF   bool
	Depth     int
	Timestamp time.Time
}

Form represents an HTML form discovered during crawling.

type FormAnalyzer

type FormAnalyzer struct{}

FormAnalyzer provides comprehensive form analysis.

func NewFormAnalyzer

func NewFormAnalyzer() *FormAnalyzer

NewFormAnalyzer creates a new form analyzer.

func (*FormAnalyzer) Analyze

func (a *FormAnalyzer) Analyze(form FormInfo, pageURL string) *AnalyzeResult

Analyze performs comprehensive analysis of a form.

func (*FormAnalyzer) GeneratePayload

func (a *FormAnalyzer) GeneratePayload(form FormInfo) map[string]string

GeneratePayload generates test payloads for a form.

type FormInfo

type FormInfo struct {
	Action  string
	Method  string
	Enctype string
	ID      string
	Name    string
	Class   string
	Inputs  []InputInfo
	Buttons []ButtonInfo
}

FormInfo represents a parsed form.

type FormInput

type FormInput struct {
	Name        string
	Type        string
	Value       string
	Required    bool
	Placeholder string
	Pattern     string
	MaxLength   int
	MinLength   int
}

FormInput represents an input field in a form.

type FormType

type FormType string

FormType represents the type of form.

const (
	FormTypeLogin    FormType = "login"
	FormTypeSignup   FormType = "signup"
	FormTypeSearch   FormType = "search"
	FormTypeContact  FormType = "contact"
	FormTypePayment  FormType = "payment"
	FormTypeUpload   FormType = "upload"
	FormTypeComment  FormType = "comment"
	FormTypeSettings FormType = "settings"
	FormTypeGeneric  FormType = "generic"
)

type FunctionInfo

type FunctionInfo struct {
	Name       string
	Parameters []string
	Line       int
}

FunctionInfo represents a function signature.

type HTMLParser

type HTMLParser struct {
	// contains filtered or unexported fields
}

HTMLParser parses HTML documents to extract links and other elements.

func NewHTMLParser

func NewHTMLParser(baseURL string) (*HTMLParser, error)

NewHTMLParser creates a new HTML parser.

func (*HTMLParser) Parse

func (p *HTMLParser) Parse(html string) (*ParseResult, error)

Parse parses an HTML document.

type InputInfo

type InputInfo struct {
	Name         string
	Type         string
	Value        string
	ID           string
	Class        string
	Required     bool
	Disabled     bool
	Readonly     bool
	Placeholder  string
	Pattern      string
	MinLength    int
	MaxLength    int
	Min          string
	Max          string
	Step         string
	Multiple     bool
	Accept       string
	Autocomplete string
}

InputInfo represents a form input.

type JSParseResult

type JSParseResult struct {
	URLs         []string
	APIEndpoints []APIEndpoint
	WebSockets   []string
	Secrets      []PotentialSecret
	Routes       []Route
	Functions    []FunctionInfo
}

JSParseResult contains the result of JavaScript analysis.

type JSParser

type JSParser struct{}

JSParser performs static analysis on JavaScript code.

func NewJSParser

func NewJSParser() *JSParser

NewJSParser creates a new JavaScript parser.

func (*JSParser) Parse

func (p *JSParser) Parse(js string) *JSParseResult

Parse analyzes JavaScript code.

type Link struct {
	URL      string
	Text     string
	Rel      string
	Target   string
	NoFollow bool
}

Link represents a parsed link.

type Parameter

type Parameter struct {
	Name     string
	Type     string
	Example  string
	Required bool
}

Parameter represents a request parameter.

type ParseResult

type ParseResult struct {
	Links       []Link
	Forms       []FormInfo
	Scripts     []string
	Stylesheets []string
	Images      []string
	Iframes     []string
	Meta        map[string]string
	Comments    []string
}

ParseResult contains the result of parsing an HTML document.

type PotentialSecret

type PotentialSecret struct {
	Type    string
	Value   string
	Line    int
	Context string
}

PotentialSecret represents a potential secret in code.

type Route

type Route struct {
	Path      string
	Component string
}

Route represents a client-side route.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL