Documentation
¶
Index ¶
- Constants
- Variables
- func FetchWithBrowser(url string, timeout int, proxyProfile *config.ProxyProfile) (string, error)
- func GetExtraPlaceholderHashes(settings *config.ScraperSettings) []string
- func GetPlaceholderThreshold(settings *config.ScraperSettings) int
- func IsPlaceholder(ctx context.Context, client *resty.Client, url string, thresholdBytes int64, ...) (bool, error)
- func MergePlaceholderHashes(settings *config.ScraperSettings) []string
- func NewHTTPClient(cfg *config.ScraperSettings, globalProxy *config.ProxyConfig, ...) (*resty.Client, *config.ProxyProfile, error)
- type DMMConfig
- func (c *DMMConfig) GetDownloadProxy() any
- func (c *DMMConfig) GetMaxRetries() int
- func (c *DMMConfig) GetProxy() any
- func (c *DMMConfig) GetRequestDelay() int
- func (c *DMMConfig) GetUserAgent() string
- func (c *DMMConfig) IsEnabled() bool
- func (c *DMMConfig) ToScraperSettings() *config.ScraperSettings
- func (c *DMMConfig) ValidateConfig(sc *config.ScraperSettings) error
- type JSONLDAggregateRating
- type JSONLDBrand
- type JSONLDOffer
- type JSONLDProduct
- type JSONLDVideoObject
- type Scraper
- func (s *Scraper) CanHandleURL(rawURL string) bool
- func (s *Scraper) Close() error
- func (s *Scraper) Config() *config.ScraperSettings
- func (s *Scraper) ExtractIDFromURL(urlStr string) (string, error)
- func (s *Scraper) GetURL(id string) (string, error)
- func (s *Scraper) IsEnabled() bool
- func (s *Scraper) Name() string
- func (s *Scraper) ResolveContentID(id string) (string, error)
- func (s *Scraper) ResolveDownloadProxyForHost(host string) (*config.ProxyConfig, *config.ProxyConfig, bool)
- func (s *Scraper) ScrapeURL(url string) (*models.ScraperResult, error)
- func (s *Scraper) Search(id string) (*models.ScraperResult, error)
- func (s *Scraper) ValidateConfig(cfg *config.ScraperSettings) error
Constants ¶
const ( ConfigKeyPlaceholderThreshold = placeholder.ConfigKeyThreshold ConfigKeyExtraPlaceholderHashes = placeholder.ConfigKeyHashes )
const DefaultPlaceholderThresholdKB = placeholder.DefaultThresholdKB
Variables ¶
var DefaultPlaceholderHashes = placeholder.DefaultDMMPlaceholderHashes
Functions ¶
func FetchWithBrowser ¶
FetchWithBrowser fetches a URL using Chrome browser automation with age verification cookies
func GetExtraPlaceholderHashes ¶
func GetExtraPlaceholderHashes(settings *config.ScraperSettings) []string
func GetPlaceholderThreshold ¶
func GetPlaceholderThreshold(settings *config.ScraperSettings) int
func IsPlaceholder ¶
func MergePlaceholderHashes ¶
func MergePlaceholderHashes(settings *config.ScraperSettings) []string
func NewHTTPClient ¶
func NewHTTPClient(cfg *config.ScraperSettings, globalProxy *config.ProxyConfig, globalFlareSolverr config.FlareSolverrConfig) (*resty.Client, *config.ProxyProfile, error)
NewHTTPClient creates an HTTP client for the DMM scraper. HTTP-01: Per-scraper HTTP client ownership. Returns client, effective proxyProfile (for browser use), and error.
Types ¶
type DMMConfig ¶
type DMMConfig struct {
Enabled bool `yaml:"enabled" json:"enabled"`
RequestDelay int `yaml:"request_delay" json:"request_delay"`
MaxRetries int `yaml:"max_retries" json:"max_retries"`
UserAgent string `yaml:"user_agent" json:"user_agent"`
Proxy *config.ProxyConfig `yaml:"proxy,omitempty" json:"proxy,omitempty"`
DownloadProxy *config.ProxyConfig `yaml:"download_proxy,omitempty" json:"download_proxy,omitempty"`
Priority int `yaml:"priority" json:"priority"` // Scraper's priority (higher = higher priority)
// Per-scraper browser and scrape_actress settings
UseBrowser bool `yaml:"use_browser" json:"use_browser"`
ScrapeActress bool `yaml:"scrape_actress" json:"scrape_actress"`
// Placeholder detection settings
PlaceholderThresholdKB int `yaml:"placeholder_threshold" json:"placeholder_threshold"`
ExtraPlaceholderHashes []string `yaml:"extra_placeholder_hashes" json:"extra_placeholder_hashes"`
}
Config holds DMM/Fanza scraper configuration. YAML tags are defined here for unmarshaling via config.ScrapersConfig.
func (*DMMConfig) GetDownloadProxy ¶
GetDownloadProxy implements scraperutil.ScraperConfigInterface.
func (*DMMConfig) GetMaxRetries ¶
GetMaxRetries implements scraperutil.ScraperConfigInterface.
func (*DMMConfig) GetRequestDelay ¶
GetRequestDelay implements scraperutil.ScraperConfigInterface.
func (*DMMConfig) GetUserAgent ¶
GetUserAgent implements scraperutil.ScraperConfigInterface.
func (*DMMConfig) ToScraperSettings ¶
func (c *DMMConfig) ToScraperSettings() *config.ScraperSettings
ToScraperSettings converts DMMConfig to ScraperSettings, flowing placeholder settings to Extra map for runtime access by placeholder detection functions.
func (*DMMConfig) ValidateConfig ¶
func (c *DMMConfig) ValidateConfig(sc *config.ScraperSettings) error
ValidateConfig implements config.ConfigValidator for DMMConfig.
type JSONLDAggregateRating ¶
type JSONLDAggregateRating struct {
Type string `json:"@type"`
RatingValue float64 `json:"ratingValue"`
RatingCount int `json:"ratingCount"`
}
JSONLDAggregateRating represents the AggregateRating schema
type JSONLDBrand ¶
JSONLDBrand represents the Brand schema
type JSONLDOffer ¶
type JSONLDOffer struct {
Type string `json:"@type"`
Availability string `json:"availability"`
PriceCurrency string `json:"priceCurrency"`
Price float64 `json:"price"`
}
JSONLDOffer represents the Offer schema
type JSONLDProduct ¶
type JSONLDProduct struct {
Context string `json:"@context"`
Type string `json:"@type"`
Name string `json:"name"`
Description string `json:"description"`
Image interface{} `json:"image"` // Can be string or array
SKU string `json:"sku"`
Brand *JSONLDBrand `json:"brand"`
SubjectOf *JSONLDVideoObject `json:"subjectOf"`
Offers *JSONLDOffer `json:"offers"`
AggregateRating *JSONLDAggregateRating `json:"aggregateRating"`
}
JSONLDProduct represents the Product schema from JSON-LD
type JSONLDVideoObject ¶
type JSONLDVideoObject struct {
Type string `json:"@type"`
Name string `json:"name"`
Description string `json:"description"`
ContentURL string `json:"contentUrl"`
ThumbnailURL string `json:"thumbnailUrl"`
UploadDate string `json:"uploadDate"`
Genre []string `json:"genre"`
}
JSONLDVideoObject represents the VideoObject schema
type Scraper ¶
type Scraper struct {
// contains filtered or unexported fields
}
Scraper implements the DMM/Fanza scraper
func New ¶
func New(settings config.ScraperSettings, globalConfig *config.ScrapersConfig, contentIDRepo *database.ContentIDMappingRepository) *Scraper
func (*Scraper) CanHandleURL ¶
CanHandleURL returns true if this scraper can handle the given URL
func (*Scraper) Config ¶
func (s *Scraper) Config() *config.ScraperSettings
Config returns the scraper's configuration
func (*Scraper) ExtractIDFromURL ¶
ExtractIDFromURL extracts the movie ID from a DMM URL
func (*Scraper) ResolveContentID ¶
ResolveContentID attempts to resolve the display ID to an actual DMM content ID by first checking the cache, then scraping DMM search if needed
func (*Scraper) ResolveDownloadProxyForHost ¶
func (s *Scraper) ResolveDownloadProxyForHost(host string) (*config.ProxyConfig, *config.ProxyConfig, bool)
ResolveDownloadProxyForHost declares DMM-owned media hosts for downloader proxy routing.
func (*Scraper) ScrapeURL ¶
func (s *Scraper) ScrapeURL(url string) (*models.ScraperResult, error)
ScrapeURL directly scrapes metadata from a DMM URL. This provides more accurate results than ID-based search when the exact URL is known.
func (*Scraper) Search ¶
func (s *Scraper) Search(id string) (*models.ScraperResult, error)
Search searches for and scrapes metadata for a given movie ID
func (*Scraper) ValidateConfig ¶
func (s *Scraper) ValidateConfig(cfg *config.ScraperSettings) error
ValidateConfig validates the scraper configuration. Returns error if config is invalid, nil if valid.