Documentation
¶
Index ¶
- Constants
- func CombineHeaders(presets ...map[string]string) map[string]string
- func DMMHeaders() map[string]string
- func DrainAndClose(body io.ReadCloser) error
- func IsValidCookieName(name string) bool
- func JSONAPIHeaders() map[string]string
- func JapaneseLanguageHeaders() map[string]string
- func MergeCookieHeader(existing, new map[string]string) string
- func NewHTTPClient(proxyProfile *config.ProxyProfile, timeout time.Duration) (*http.Client, error)
- func NewRestyClient(proxyProfile *config.ProxyProfile, timeout time.Duration, retries int) (*resty.Client, error)
- func NewRestyClientNoProxy(timeout time.Duration, retries int) *resty.Client
- func NewScraperHTTPClient(cfg *config.ScraperSettings, globalProxy *config.ProxyConfig, ...) (*resty.Client, error)
- func NewTransport(proxyProfile *config.ProxyProfile) (*http.Transport, error)
- func R18DevHeaders() map[string]string
- func RefererHeader(url string) map[string]string
- func SanitizeCookieValue(value string) string
- func SanitizeProxyURL(proxyURL string) string
- func StandardHTMLHeaders() map[string]string
- func UserAgentHeader(ua string) map[string]string
- type FlareSolverr
- func GetFlareSolverrFromClient(client *resty.Client) (*FlareSolverr, bool)
- func NewFlareSolverr(cfg *config.FlareSolverrConfig) (*FlareSolverr, error)
- func NewRestyClientWithFlareSolverr(proxyProfile *config.ProxyProfile, flaresolverrCfg config.FlareSolverrConfig, ...) (*resty.Client, *FlareSolverr, error)
- func (fs *FlareSolverr) Close() error
- func (fs *FlareSolverr) CreateSession() (string, error)
- func (fs *FlareSolverr) DestroySession(sessionID string) error
- func (fs *FlareSolverr) ResolveURL(targetURL string) (string, []http.Cookie, error)
- func (fs *FlareSolverr) ResolveURLWithSession(targetURL, sessionID string) (string, []http.Cookie, error)
- type FlareSolverrProxy
- type FlareSolverrRequest
- type FlareSolverrResponse
- type FlareSolverrSession
- type HTTPClient
- type ScraperClient
- type ScraperClientBuilder
- func (b *ScraperClientBuilder) Apply(opts ...ScraperOption) *ScraperClientBuilder
- func (b *ScraperClientBuilder) Build() (*ScraperClient, error)
- func (b *ScraperClientBuilder) BuildClient() (*resty.Client, error)
- func (b *ScraperClientBuilder) BuildWithFlareSolverr() (*resty.Client, *FlareSolverr, error)
- func (b *ScraperClientBuilder) BuildWithProxy() (*resty.Client, *config.ProxyProfile, error)
- type ScraperClientResult
- type ScraperHTTPClientOption
- type ScraperOption
- func WithCookies(cookies map[string]string) ScraperOption
- func WithFlareSolverr(enabled bool) ScraperOption
- func WithGlobalFlareSolverr(cfg config.FlareSolverrConfig) ScraperOption
- func WithGlobalProxy(global config.ProxyConfig) ScraperOption
- func WithHeader(key, value string) ScraperOption
- func WithHeaders(headers map[string]string) ScraperOption
- func WithProxyProfileReturn(enabled bool) ScraperOption
- func WithRetryCount(count int) ScraperOption
- func WithScraperProxy(scraper *config.ProxyConfig) ScraperOption
- func WithTimeout(timeout time.Duration) ScraperOption
Constants ¶
const ( DefaultTimeout = 30 * time.Second DefaultRetryCount = 3 )
Variables ¶
This section is empty.
Functions ¶
func DMMHeaders ¶
func DrainAndClose ¶
func DrainAndClose(body io.ReadCloser) error
func IsValidCookieName ¶
IsValidCookieName validates a cookie name against RFC 6265 token rules. Cookie names must be valid tokens: alphanumeric, dash, underscore, and a few special chars.
func JSONAPIHeaders ¶
func JapaneseLanguageHeaders ¶
func MergeCookieHeader ¶
func NewHTTPClient ¶
NewHTTPClient creates a standard http.Client with proxy support
func NewRestyClient ¶
func NewRestyClient(proxyProfile *config.ProxyProfile, timeout time.Duration, retries int) (*resty.Client, error)
NewRestyClient creates a resty.Client with proxy support
func NewRestyClientNoProxy ¶
NewRestyClientNoProxy creates a resty.Client that explicitly bypasses environment proxy variables by using a no-proxy transport.
func NewScraperHTTPClient ¶
func NewScraperHTTPClient(cfg *config.ScraperSettings, globalProxy *config.ProxyConfig, globalFlareSolverr config.FlareSolverrConfig, opts ...ScraperHTTPClientOption) (*resty.Client, error)
func NewTransport ¶
func NewTransport(proxyProfile *config.ProxyProfile) (*http.Transport, error)
NewTransport creates an http.Transport with optional proxy support
func R18DevHeaders ¶
func RefererHeader ¶
func SanitizeCookieValue ¶
SanitizeCookieValue removes characters forbidden in RFC 6265 cookie values. Prevents header injection and ensures parsing stability.
func SanitizeProxyURL ¶
SanitizeProxyURL removes credentials from proxy URL for safe logging
func StandardHTMLHeaders ¶
func UserAgentHeader ¶
Types ¶
type FlareSolverr ¶
type FlareSolverr struct {
// contains filtered or unexported fields
}
FlareSolverr represents the FlareSolverr client
func GetFlareSolverrFromClient ¶
func GetFlareSolverrFromClient(client *resty.Client) (*FlareSolverr, bool)
GetFlareSolverrFromClient extracts FlareSolverr instance from resty client context Note: This is a helper for scrapers that need to access FlareSolverr The FlareSolverr instance is typically stored separately and passed to scrapers
func NewFlareSolverr ¶
func NewFlareSolverr(cfg *config.FlareSolverrConfig) (*FlareSolverr, error)
NewFlareSolverr creates a new FlareSolverr client
func NewRestyClientWithFlareSolverr ¶
func NewRestyClientWithFlareSolverr(proxyProfile *config.ProxyProfile, flaresolverrCfg config.FlareSolverrConfig, timeout time.Duration, retries int) (*resty.Client, *FlareSolverr, error)
NewRestyClientWithFlareSolverr creates a resty.Client with optional FlareSolverr support Note: FlareSolverr config is passed separately since it's at ScrapersConfig.FlareSolverr (top-level), not inside ProxyConfig (which only holds proxy settings).
func (*FlareSolverr) Close ¶
func (fs *FlareSolverr) Close() error
Close cleans up resources held by the FlareSolverr. Currently a no-op since FlareSolverr uses stateless HTTP requests, but provides a cleanup hook for future resource management.
func (*FlareSolverr) CreateSession ¶
func (fs *FlareSolverr) CreateSession() (string, error)
CreateSession creates a new FlareSolverr session for cookie persistence
func (*FlareSolverr) DestroySession ¶
func (fs *FlareSolverr) DestroySession(sessionID string) error
DestroySession destroys a FlareSolverr session via HTTP and removes it from local cache.
func (*FlareSolverr) ResolveURL ¶
ResolveURL resolves a URL through FlareSolverr, returning HTML content and cookies. The mutex is held for the entire operation to ensure session reuse is safe from concurrent reset calls.
func (*FlareSolverr) ResolveURLWithSession ¶
func (fs *FlareSolverr) ResolveURLWithSession(targetURL, sessionID string) (string, []http.Cookie, error)
ResolveURLWithSession resolves a URL using a specific session
type FlareSolverrProxy ¶
type FlareSolverrProxy struct {
URL string `json:"url"`
Username string `json:"username,omitempty"`
Password string `json:"password,omitempty"`
}
FlareSolverrProxy represents a per-request proxy configuration passed to FlareSolverr. This is used for the target URL request made by FlareSolverr, not for calls to FlareSolverr itself.
type FlareSolverrRequest ¶
type FlareSolverrRequest struct {
Cmd string `json:"cmd"` // "request.get" or "sessions.create"
URL string `json:"url"` // Target URL
MaxTimeout int `json:"maxTimeout"` // Timeout in milliseconds (FlareSolverr expects ms)
Session string `json:"session"` // Optional: reuse existing session
SessionTTLMinutes int `json:"session_ttl_minutes,omitempty"` // Optional: rotate existing session when older than TTL
Proxy *FlareSolverrProxy `json:"proxy,omitempty"` // Optional: proxy for target URL request
}
FlareSolverrRequest represents a request to FlareSolverr
type FlareSolverrResponse ¶
type FlareSolverrResponse struct {
Status string `json:"status"`
Message string `json:"message"`
Solution struct {
Response string `json:"response"`
Cookies []struct {
Name string `json:"name"`
Value string `json:"value"`
} `json:"cookies"`
UserAgent string `json:"userAgent"`
} `json:"solution"`
Session string `json:"session"`
}
FlareSolverrResponse represents a FlareSolverr response
type FlareSolverrSession ¶
type FlareSolverrSession struct {
Token string
Created time.Time
URLs []string
Cookies []http.Cookie
}
FlareSolverrSession represents a FlareSolverr session
type HTTPClient ¶
type HTTPClient interface {
// Do executes an HTTP request and returns the response
Do(req *http.Request) (*http.Response, error)
}
HTTPClient defines the interface for HTTP operations This allows for easy mocking in tests
type ScraperClient ¶
type ScraperClient struct {
Client *resty.Client
FlareSolverr *FlareSolverr
ProxyProfile *config.ProxyProfile
}
type ScraperClientBuilder ¶
type ScraperClientBuilder struct {
// contains filtered or unexported fields
}
func FromScraperSettings ¶
func FromScraperSettings(settings *config.ScraperSettings, globalProxy *config.ProxyConfig, globalFlareSolverr config.FlareSolverrConfig, opts ...ScraperOption) *ScraperClientBuilder
func NewScraperClientBuilder ¶
func NewScraperClientBuilder() *ScraperClientBuilder
func (*ScraperClientBuilder) Apply ¶
func (b *ScraperClientBuilder) Apply(opts ...ScraperOption) *ScraperClientBuilder
func (*ScraperClientBuilder) Build ¶
func (b *ScraperClientBuilder) Build() (*ScraperClient, error)
func (*ScraperClientBuilder) BuildClient ¶
func (b *ScraperClientBuilder) BuildClient() (*resty.Client, error)
func (*ScraperClientBuilder) BuildWithFlareSolverr ¶
func (b *ScraperClientBuilder) BuildWithFlareSolverr() (*resty.Client, *FlareSolverr, error)
func (*ScraperClientBuilder) BuildWithProxy ¶
func (b *ScraperClientBuilder) BuildWithProxy() (*resty.Client, *config.ProxyProfile, error)
type ScraperClientResult ¶
type ScraperClientResult struct {
Client *resty.Client
ProxyProfile *config.ProxyProfile
ProxyEnabled bool
}
func InitScraperClient ¶
func InitScraperClient(settings *config.ScraperSettings, globalProxy *config.ProxyConfig, globalFlareSolverr config.FlareSolverrConfig, opts ...ScraperHTTPClientOption) *ScraperClientResult
type ScraperHTTPClientOption ¶
type ScraperHTTPClientOption func(*scraperHTTPConfig)
func WithProxyProfile ¶
func WithProxyProfile() ScraperHTTPClientOption
func WithScraperCookies ¶
func WithScraperCookies(cookies map[string]string) ScraperHTTPClientOption
func WithScraperHeaders ¶
func WithScraperHeaders(headers map[string]string) ScraperHTTPClientOption
func WithScraperUserAgent ¶
func WithScraperUserAgent(ua string) ScraperHTTPClientOption
type ScraperOption ¶
type ScraperOption func(*scraperConfig)
func WithCookies ¶
func WithCookies(cookies map[string]string) ScraperOption
func WithFlareSolverr ¶
func WithFlareSolverr(enabled bool) ScraperOption
func WithGlobalFlareSolverr ¶
func WithGlobalFlareSolverr(cfg config.FlareSolverrConfig) ScraperOption
func WithGlobalProxy ¶
func WithGlobalProxy(global config.ProxyConfig) ScraperOption
func WithHeader ¶
func WithHeader(key, value string) ScraperOption
func WithHeaders ¶
func WithHeaders(headers map[string]string) ScraperOption
func WithProxyProfileReturn ¶
func WithProxyProfileReturn(enabled bool) ScraperOption
func WithRetryCount ¶
func WithRetryCount(count int) ScraperOption
func WithScraperProxy ¶
func WithScraperProxy(scraper *config.ProxyConfig) ScraperOption
func WithTimeout ¶
func WithTimeout(timeout time.Duration) ScraperOption