browser

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 1, 2026 License: MIT Imports: 15 Imported by: 0

Documentation

Overview

Package browser provides headless Chrome integration via Rod.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ExtractAJAXURLsFromJS

func ExtractAJAXURLsFromJS(jsContent string) []string

ExtractAJAXURLsFromJS extracts AJAX URLs from JavaScript content.

func IsNonRoutingHash

func IsNonRoutingHash(hash string) bool

IsNonRoutingHash checks if a hash change is likely non-routing (UI state).

func NormalizeHashURL

func NormalizeHashURL(rawURL string) string

NormalizeHashURL normalizes a hash-based URL for deduplication.

Types

type AJAXEndpoint

type AJAXEndpoint struct {
	URL         string
	Method      string
	ContentType string
	Parameters  []string
	Source      string // click, scroll, form, script, etc.
	Trigger     string // Element or event that triggered the request
}

AJAXEndpoint represents a discovered AJAX endpoint.

type AJAXForm

type AJAXForm struct {
	FormID      string
	FormName    string
	Action      string
	Method      string
	SubmitType  string // jquery, fetch, xhr, axios
	Inputs      []InputData
	CallbackURL string
}

AJAXForm represents a form that submits via AJAX.

type AJAXHandler

type AJAXHandler struct {
	// contains filtered or unexported fields
}

AJAXHandler provides comprehensive AJAX handling for web pages.

func NewAJAXHandler

func NewAJAXHandler() *AJAXHandler

NewAJAXHandler creates a new AJAX handler.

func (*AJAXHandler) AnalyzeAJAX

func (h *AJAXHandler) AnalyzeAJAX(page *rod.Page) *AJAXResult

AnalyzeAJAX performs comprehensive AJAX analysis on a page.

func (*AJAXHandler) ExtractAJAXEndpoints

func (h *AJAXHandler) ExtractAJAXEndpoints(page *rod.Page) []AJAXEndpoint

ExtractAJAXEndpoints extracts AJAX endpoints from JavaScript code.

func (*AJAXHandler) ExtractAJAXForms

func (h *AJAXHandler) ExtractAJAXForms(page *rod.Page) []AJAXForm

ExtractAJAXForms finds forms that submit via AJAX.

func (*AJAXHandler) GetCapturedRequests

func (h *AJAXHandler) GetCapturedRequests(page *rod.Page) []NetworkRequest

GetCapturedRequests retrieves all captured AJAX requests from the page.

func (*AJAXHandler) InjectAJAXInterceptor

func (h *AJAXHandler) InjectAJAXInterceptor(page *rod.Page) error

InjectAJAXInterceptor injects JavaScript to intercept all AJAX calls.

func (*AJAXHandler) MonitorDynamicContent

func (h *AJAXHandler) MonitorDynamicContent(page *rod.Page, duration time.Duration) []string

MonitorDynamicContent monitors for dynamically loaded content.

func (*AJAXHandler) TriggerAJAXEvents

func (h *AJAXHandler) TriggerAJAXEvents(page *rod.Page) error

TriggerAJAXEvents triggers common events that might cause AJAX requests.

func (*AJAXHandler) WaitForAJAX

func (h *AJAXHandler) WaitForAJAX(page *rod.Page, timeout time.Duration) error

WaitForAJAX waits for pending AJAX requests to complete.

type AJAXResult

type AJAXResult struct {
	Requests       []NetworkRequest
	AJAXEndpoints  []AJAXEndpoint
	AJAXForms      []AJAXForm
	DynamicContent []string
}

AJAXResult contains results from AJAX analysis.

type Browser

type Browser struct {
	// contains filtered or unexported fields
}

Browser wraps a Rod browser instance.

func New

func New(config Config) (*Browser, error)

New creates a new browser instance.

func (*Browser) Close

func (b *Browser) Close() error

Close closes the browser.

func (*Browser) GetConfig

func (b *Browser) GetConfig() Config

GetConfig returns the browser configuration.

func (*Browser) NeedsRecycle

func (b *Browser) NeedsRecycle() bool

NeedsRecycle checks if the browser needs recycling.

func (*Browser) PageCount

func (b *Browser) PageCount() int

PageCount returns the number of pages visited.

func (*Browser) Visit

func (b *Browser) Visit(ctx context.Context, url string, headers map[string]string, cookies []*http.Cookie) (*PageResult, error)

Visit navigates to a URL and extracts page data.

func (*Browser) VisitHashRoute

func (b *Browser) VisitHashRoute(ctx context.Context, baseURL string, hashRoute string, headers map[string]string, cookies []*http.Cookie) (*PageResult, error)

VisitHashRoute navigates to a hash-based route within an existing page. This is used for SPA navigation where the hash change doesn't reload the page.

func (*Browser) VisitHashRouteWithOptions

func (b *Browser) VisitHashRouteWithOptions(ctx context.Context, baseURL string, hashRoute string, headers map[string]string, cookies []*http.Cookie, opts VisitOptions) (*PageResult, error)

VisitHashRouteWithOptions navigates to a hash-based route with options.

func (*Browser) VisitWithOptions

func (b *Browser) VisitWithOptions(ctx context.Context, url string, headers map[string]string, cookies []*http.Cookie, opts VisitOptions) (*PageResult, error)

VisitWithOptions navigates to a URL with custom options.

type Config

type Config struct {
	PoolSize          int           `json:"pool_size"`
	Headless          bool          `json:"headless"`
	Timeout           time.Duration `json:"timeout"`
	UserAgent         string        `json:"user_agent"`
	ViewportWidth     int           `json:"viewport_width"`
	ViewportHeight    int           `json:"viewport_height"`
	RecycleAfter      int           `json:"recycle_after"`
	IgnoreHTTPSErrors bool          `json:"ignore_https_errors"`
	FastMode          bool          `json:"fast_mode"` // Skip heavy analysis for speed
}

Config defines browser configuration.

func DefaultConfig

func DefaultConfig() Config

DefaultConfig returns default browser configuration.

type FormData

type FormData struct {
	Action  string
	Method  string
	Enctype string
	ID      string
	Name    string
	Inputs  []InputData
}

FormData represents form data extracted from a page.

type InputData

type InputData struct {
	Name        string
	Type        string
	Value       string
	Required    bool
	Placeholder string
	Pattern     string
	MaxLength   int
	MinLength   int
}

InputData represents form input data.

type Interceptor

type Interceptor struct {
	// contains filtered or unexported fields
}

Interceptor captures network requests during page loads.

func NewInterceptor

func NewInterceptor() *Interceptor

NewInterceptor creates a new request interceptor.

func (*Interceptor) AddFilter

func (i *Interceptor) AddFilter(filter RequestFilter)

AddFilter adds a filter for capturing specific requests.

func (*Interceptor) Clear

func (i *Interceptor) Clear()

Clear clears all captured requests.

func (*Interceptor) GetAPIRequests

func (i *Interceptor) GetAPIRequests() []NetworkRequest

GetAPIRequests returns requests that look like API calls.

func (*Interceptor) GetRequests

func (i *Interceptor) GetRequests() []NetworkRequest

GetRequests returns all captured requests.

func (*Interceptor) GetTimeline

func (i *Interceptor) GetTimeline() *RequestTimeline

GetTimeline returns requests grouped by time.

func (*Interceptor) GroupByEndpoint

func (i *Interceptor) GroupByEndpoint() []RequestGroup

GroupByEndpoint groups requests by their endpoint.

func (*Interceptor) Record

func (i *Interceptor) Record(req NetworkRequest)

Record records a network request.

func (*Interceptor) Stats

func (i *Interceptor) Stats() InterceptorStats

Stats returns interceptor statistics.

type InterceptorStats

type InterceptorStats struct {
	TotalRequests int            `json:"total_requests"`
	APIRequests   int            `json:"api_requests"`
	ByType        map[string]int `json:"by_type"`
	ByMethod      map[string]int `json:"by_method"`
}

Stats returns interceptor statistics.

type NetworkRequest

type NetworkRequest struct {
	URL          string
	Method       string
	Headers      map[string]string
	PostData     string
	ResourceType string
	Timestamp    time.Time
}

NetworkRequest represents an intercepted network request.

type PageResult

type PageResult struct {
	URL             string
	FinalURL        string
	StatusCode      int
	ContentType     string
	HTML            string
	Title           string
	Links           []string
	Scripts         []string
	Forms           []FormData
	XHRRequests     []NetworkRequest
	WebSockets      []string
	Cookies         []*http.Cookie
	ResponseTime    time.Duration
	Error           error
	Framework       *framework.DetectionResult
	FrameworkRoutes []framework.Route
	FrameworkLinks  []framework.Link
	// AJAX-specific results
	AJAXEndpoints  []AJAXEndpoint
	AJAXForms      []AJAXForm
	DynamicContent []string

	// SPA-specific results
	ContentHash    string   // Hash of page content for dedup
	IsSoftError    bool     // True if page shows error content with 200 status
	SoftErrorMsg   string   // Error message if soft error detected
	ShadowDOMLinks []string // Links extracted from Shadow DOM
	IsAuthPage     bool     // True if redirected to auth page
	AuthURL        string   // URL of auth page if redirected
}

PageResult contains the result of a page visit.

type Pool

type Pool struct {
	// contains filtered or unexported fields
}

Pool manages a pool of browser instances.

func NewPool

func NewPool(config Config) (*Pool, error)

NewPool creates a new browser pool.

func (*Pool) Acquire

func (p *Pool) Acquire(ctx context.Context) (*Browser, error)

Acquire gets a browser from the pool.

func (*Pool) Close

func (p *Pool) Close() error

Close closes all browsers in the pool.

func (*Pool) Release

func (p *Pool) Release(browser *Browser)

Release returns a browser to the pool.

func (*Pool) Size

func (p *Pool) Size() int

Size returns the pool size.

func (*Pool) Stats

func (p *Pool) Stats() PoolStats

Stats returns pool statistics.

func (*Pool) Visit

func (p *Pool) Visit(ctx context.Context, url string, headers map[string]string, cookies []*http.Cookie) (*PageResult, error)

Visit acquires a browser, visits the URL, and releases it.

func (*Pool) VisitHashRoute

func (p *Pool) VisitHashRoute(ctx context.Context, baseURL string, hashRoute string, headers map[string]string, cookies []*http.Cookie) (*PageResult, error)

VisitHashRoute visits a hash-based route within an SPA.

func (*Pool) VisitHashRouteWithOptions

func (p *Pool) VisitHashRouteWithOptions(ctx context.Context, baseURL string, hashRoute string, headers map[string]string, cookies []*http.Cookie, opts VisitOptions) (*PageResult, error)

VisitHashRouteWithOptions visits a hash-based route with options.

func (*Pool) VisitWithOptions

func (p *Pool) VisitWithOptions(ctx context.Context, url string, headers map[string]string, cookies []*http.Cookie, opts VisitOptions) (*PageResult, error)

VisitWithOptions acquires a browser, visits the URL with options, and releases it.

type PoolStats

type PoolStats struct {
	Size       int `json:"size"`
	Available  int `json:"available"`
	TotalPages int `json:"total_pages"`
}

Stats returns pool statistics.

type RequestFilter

type RequestFilter struct {
	URLPattern   string
	ResourceType string
	Method       string
}

RequestFilter defines a filter for network requests.

type RequestGroup

type RequestGroup struct {
	Endpoint   string
	Method     string
	Requests   []NetworkRequest
	Parameters map[string][]string
}

RequestGroup groups requests by endpoint.

type RequestTimeline

type RequestTimeline struct {
	Start    time.Time
	End      time.Time
	Requests []NetworkRequest
}

RequestTimeline represents requests over time.

type SPAConfig

type SPAConfig struct {
	// Maximum wait time for content to load
	MaxWaitTime time.Duration

	// Minimum content length to consider page loaded
	MinContentLength int

	// Selectors that indicate loading state
	LoadingSelectors []string

	// Selectors that indicate content is ready
	ReadySelectors []string

	// Enable content hash deduplication
	EnableContentDedup bool

	// Maximum retries for rendering
	MaxRetries int

	// Enable stealth mode (anti-detection)
	StealthMode bool
}

SPAConfig contains SPA handling configuration.

func DefaultSPAConfig

func DefaultSPAConfig() SPAConfig

DefaultSPAConfig returns sensible defaults for SPA handling.

type SPAHandler

type SPAHandler struct {
	// contains filtered or unexported fields
}

SPAHandler handles Single Page Application specific challenges.

func NewSPAHandler

func NewSPAHandler(config SPAConfig) *SPAHandler

NewSPAHandler creates a new SPA handler.

func (*SPAHandler) ApplyStealthMode

func (h *SPAHandler) ApplyStealthMode(page *rod.Page) error

ApplyStealthMode applies anti-detection measures.

func (*SPAHandler) DetectMicroFrontends

func (h *SPAHandler) DetectMicroFrontends(page *rod.Page) ([]string, error)

DetectMicroFrontends detects multiple Angular/SPA instances.

func (*SPAHandler) ExtractShadowDOMContent

func (h *SPAHandler) ExtractShadowDOMContent(page *rod.Page) ([]string, error)

ExtractShadowDOMContent extracts content from Shadow DOM elements.

func (*SPAHandler) GetContentHash

func (h *SPAHandler) GetContentHash(page *rod.Page) (string, error)

GetContentHash returns a hash of the page's meaningful content.

func (*SPAHandler) HandleAuthRedirect

func (h *SPAHandler) HandleAuthRedirect(page *rod.Page) (bool, string)

HandleAuthRedirect detects and handles auth redirects.

func (*SPAHandler) HandleInfiniteScroll

func (h *SPAHandler) HandleInfiniteScroll(page *rod.Page, maxScrolls int, scrollDelay time.Duration) ([]string, error)

HandleInfiniteScroll handles infinite scroll pages with limits.

func (*SPAHandler) InjectNetworkMonitor

func (h *SPAHandler) InjectNetworkMonitor(page *rod.Page) error

InjectNetworkMonitor injects a network request monitor.

func (*SPAHandler) IsSoftError

func (h *SPAHandler) IsSoftError(page *rod.Page) (bool, string)

IsSoftError checks if the page shows a soft error (404 page with 200 status).

func (*SPAHandler) RecoverFromHang

func (h *SPAHandler) RecoverFromHang(page *rod.Page) error

RecoverFromHang attempts to recover from a stuck page.

func (*SPAHandler) SetupPageErrorHandling

func (h *SPAHandler) SetupPageErrorHandling(page *rod.Page)

SetupPageErrorHandling sets up error handlers for the page.

func (*SPAHandler) WaitForContent

func (h *SPAHandler) WaitForContent(page *rod.Page) error

WaitForContent waits for SPA content to be fully loaded.

type VisitOptions

type VisitOptions struct {
	FastMode       bool          // Skip SPA framework detection and AJAX analysis
	SPAMode        bool          // Enable advanced SPA handling (content wait, stealth, etc.)
	MaxWaitTime    time.Duration // Maximum wait time for SPA content
	EnableStealth  bool          // Enable anti-detection measures
	CheckSoftError bool          // Check for soft 404 errors
}

VisitOptions contains options for a single page visit.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL