Documentation
¶
Index ¶
- Constants
- Variables
- type Action
- type BaseAction
- type Client
- type ClientOptions
- type Fetcher
- type HTTPFetcher
- type HTTPFetcherOptions
- type Link
- type Meta
- type Metadata
- type MockFetcher
- type PDFAction
- type PDFActionOptions
- type Request
- type Response
- type ScreenshotAction
- type ScreenshotActionOptions
- type TypedAction
- type WaitAction
- type WaitActionOptions
Constants ¶
const ( DefaultMaxBodySize = 10 * 1024 * 1024 // 10 MB DefaultTimeout = 30 * time.Second )
const FakeUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0"
FakeUserAgent may be used to mimic a real browser.
Variables ¶
var ( DefaultHTTPClient = &http.Client{Timeout: DefaultTimeout} DefaultHeaders = map[string]string{} )
var FakeHeaders = map[string]string{ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Connection": "keep-alive", "Dnt": "1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "cross-site", "Upgrade-Insecure-Requests": "1", "User-Agent": FakeUserAgent, "Priority": "u=0, i", }
FakeHeaders may be used to mimic a real browser.
Functions ¶
This section is empty.
Types ¶
type Action ¶
type Action struct {
Action TypedAction
}
Action is used for JSON marshaling/unmarshaling of polymorphic actions
func NewPDFAction ¶
func NewPDFAction(options PDFActionOptions) Action
NewPDFAction creates a new PDF action
func NewScreenshotAction ¶
func NewScreenshotAction(options ScreenshotActionOptions) Action
NewScreenshotAction creates a new screenshot action
func NewWaitAction ¶
func NewWaitAction(options WaitActionOptions) Action
NewWaitAction creates a new wait action
func (*Action) MarshalJSON ¶
MarshalJSON implements custom marshaling for polymorphic actions
func (*Action) UnmarshalJSON ¶
UnmarshalJSON implements custom unmarshaling for polymorphic actions
type BaseAction ¶
type BaseAction struct {
Type string `json:"type"`
}
BaseAction contains common fields for all actions
func (BaseAction) GetType ¶
func (a BaseAction) GetType() string
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client defines a client for fetching pages via a remote proxy.
func NewClient ¶
func NewClient(options ClientOptions) *Client
NewClient creates a new client with the given options.
type ClientOptions ¶
type ClientOptions struct {
BaseURL string // Optional proxy base URL
AuthToken string // Optional authorization token
Timeout time.Duration // Optional HTTP timeout
Headers map[string]string // Optional HTTP headers
}
ClientOptions defines the options for the client.
type Fetcher ¶
type Fetcher interface {
// Fetch a webpage and return the response.
Fetch(ctx context.Context, request *Request) (*Response, error)
}
Fetcher defines an interface for fetching pages.
type HTTPFetcher ¶
type HTTPFetcher struct {
// contains filtered or unexported fields
}
HTTPFetcher implements the Fetcher interface using standard HTTP client.
func NewHTTPFetcher ¶
func NewHTTPFetcher(options HTTPFetcherOptions) *HTTPFetcher
NewHTTPFetcher creates a new HTTP fetcher
type HTTPFetcherOptions ¶
type HTTPFetcherOptions struct {
Timeout time.Duration
Headers map[string]string
Client *http.Client
MaxBodySize int64
}
HTTPFetcherOptions defines the options for the HTTP fetcher.
type MockFetcher ¶
MockFetcher implements the Fetcher interface for testing
func NewMockFetcher ¶
func NewMockFetcher() *MockFetcher
func (*MockFetcher) AddError ¶
func (m *MockFetcher) AddError(url string, err error)
func (*MockFetcher) AddResponse ¶
func (m *MockFetcher) AddResponse(url string, response *Response)
type PDFAction ¶
type PDFAction struct {
BaseAction
Format string `json:"format,omitempty"` // A4, Letter, Legal, etc.
}
PDFAction generates a PDF of the page
type PDFActionOptions ¶
type PDFActionOptions struct {
Format string `json:"format,omitempty"` // A4, Letter, Legal, etc.
}
PDFActionOptions represents the options for a PDF action
type Request ¶
type Request struct {
URL string `json:"url"`
OnlyMainContent bool `json:"only_main_content,omitempty"`
IncludeTags []string `json:"include_tags,omitempty"`
ExcludeTags []string `json:"exclude_tags,omitempty"`
MaxAge int `json:"max_age,omitempty"` // milliseconds
Timeout int `json:"timeout,omitempty"` // milliseconds
WaitFor int `json:"wait_for,omitempty"` // milliseconds
Fetcher string `json:"fetcher,omitempty"`
Mobile bool `json:"mobile,omitempty"`
Prettify bool `json:"prettify,omitempty"`
Formats []string `json:"formats,omitempty"`
Actions []Action `json:"actions,omitempty"`
Headers map[string]string `json:"headers,omitempty"`
StorageState map[string]any `json:"storage_state,omitempty"`
}
Request defines the JSON payload for fetch requests.
func ParseGetRequest ¶
ParseGetRequest parses a fetch.Request from a GET request and its query parameters.
type Response ¶
type Response struct {
URL string `json:"url"`
StatusCode int `json:"status_code"`
Headers map[string]string `json:"headers"`
HTML string `json:"html,omitempty"`
Markdown string `json:"markdown,omitempty"`
Screenshot string `json:"screenshot,omitempty"`
PDF string `json:"pdf,omitempty"`
Error string `json:"error,omitempty"`
Metadata Metadata `json:"metadata,omitempty"`
Links []*Link `json:"links,omitempty"`
StorageState map[string]any `json:"storage_state,omitempty"`
Timestamp time.Time `json:"timestamp,omitzero"`
}
Response defines the JSON payload for fetch responses.
func ProcessRequest ¶
ProcessRequest applies request options to the given HTML content and builds the corresponding response. Applies any requested transformations. This is a reference implementation and may not be used in all cases.
type ScreenshotAction ¶
type ScreenshotAction struct {
BaseAction
FullPage bool `json:"full_page,omitempty"`
}
ScreenshotAction triggers a screenshot of the page
type ScreenshotActionOptions ¶
type ScreenshotActionOptions struct {
FullPage bool `json:"full_page,omitempty"`
}
ScreenshotActionOptions represents the options for a screenshot action
type TypedAction ¶
type TypedAction interface {
GetType() string
}
TypedAction represents an action to be taken on a page
type WaitAction ¶
type WaitAction struct {
BaseAction
Selector string `json:"selector,omitempty"` // Wait for element to appear
Duration int `json:"duration,omitempty"` // Wait for specific duration in milliseconds
}
WaitAction waits for a condition or time
type WaitActionOptions ¶
type WaitActionOptions struct {
Selector string `json:"selector,omitempty"` // Wait for element to appear
Duration int `json:"duration,omitempty"` // Wait for specific duration in milliseconds
}
WaitActionOptions represents the options for a wait action