dmm

package
v0.2.4-alpha Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 12, 2026 License: MIT Imports: 25 Imported by: 0

Documentation

Index

Constants

View Source
const (
	ConfigKeyPlaceholderThreshold   = placeholder.ConfigKeyThreshold
	ConfigKeyExtraPlaceholderHashes = placeholder.ConfigKeyHashes
)
View Source
const DefaultPlaceholderThresholdKB = placeholder.DefaultThresholdKB

Variables

View Source
var DefaultPlaceholderHashes = placeholder.DefaultDMMPlaceholderHashes

Functions

func FetchWithBrowser

func FetchWithBrowser(url string, timeout int, proxyProfile *config.ProxyProfile) (string, error)

FetchWithBrowser fetches a URL using Chrome browser automation with age verification cookies

func GetExtraPlaceholderHashes

func GetExtraPlaceholderHashes(settings *config.ScraperSettings) []string

func GetPlaceholderThreshold

func GetPlaceholderThreshold(settings *config.ScraperSettings) int

func IsPlaceholder

func IsPlaceholder(ctx context.Context, client *resty.Client, url string, thresholdBytes int64, hashes []string) (bool, error)

func MergePlaceholderHashes

func MergePlaceholderHashes(settings *config.ScraperSettings) []string

func NewHTTPClient

func NewHTTPClient(cfg *config.ScraperSettings, globalProxy *config.ProxyConfig, globalFlareSolverr config.FlareSolverrConfig) (*resty.Client, *config.ProxyProfile, error)

NewHTTPClient creates an HTTP client for the DMM scraper. HTTP-01: Per-scraper HTTP client ownership. Returns client, effective proxyProfile (for browser use), and error.

Types

type DMMConfig

type DMMConfig struct {
	Enabled       bool                `yaml:"enabled" json:"enabled"`
	RequestDelay  int                 `yaml:"request_delay" json:"request_delay"`
	MaxRetries    int                 `yaml:"max_retries" json:"max_retries"`
	UserAgent     string              `yaml:"user_agent" json:"user_agent"`
	Proxy         *config.ProxyConfig `yaml:"proxy,omitempty" json:"proxy,omitempty"`
	DownloadProxy *config.ProxyConfig `yaml:"download_proxy,omitempty" json:"download_proxy,omitempty"`
	Priority      int                 `yaml:"priority" json:"priority"` // Scraper's priority (higher = higher priority)
	// Per-scraper browser and scrape_actress settings
	UseBrowser    bool `yaml:"use_browser" json:"use_browser"`
	ScrapeActress bool `yaml:"scrape_actress" json:"scrape_actress"`
	// Placeholder detection settings
	PlaceholderThresholdKB int      `yaml:"placeholder_threshold" json:"placeholder_threshold"`
	ExtraPlaceholderHashes []string `yaml:"extra_placeholder_hashes" json:"extra_placeholder_hashes"`
}

Config holds DMM/Fanza scraper configuration. YAML tags are defined here for unmarshaling via config.ScrapersConfig.

func (*DMMConfig) GetDownloadProxy

func (c *DMMConfig) GetDownloadProxy() any

GetDownloadProxy implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) GetMaxRetries

func (c *DMMConfig) GetMaxRetries() int

GetMaxRetries implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) GetProxy

func (c *DMMConfig) GetProxy() any

GetProxy implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) GetRequestDelay

func (c *DMMConfig) GetRequestDelay() int

GetRequestDelay implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) GetUserAgent

func (c *DMMConfig) GetUserAgent() string

GetUserAgent implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) IsEnabled

func (c *DMMConfig) IsEnabled() bool

IsEnabled implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) ToScraperSettings

func (c *DMMConfig) ToScraperSettings() *config.ScraperSettings

ToScraperSettings converts DMMConfig to ScraperSettings, flowing placeholder settings to Extra map for runtime access by placeholder detection functions.

func (*DMMConfig) ValidateConfig

func (c *DMMConfig) ValidateConfig(sc *config.ScraperSettings) error

ValidateConfig implements config.ConfigValidator for DMMConfig.

type JSONLDAggregateRating

type JSONLDAggregateRating struct {
	Type        string  `json:"@type"`
	RatingValue float64 `json:"ratingValue"`
	RatingCount int     `json:"ratingCount"`
}

JSONLDAggregateRating represents the AggregateRating schema

type JSONLDBrand

type JSONLDBrand struct {
	Type string `json:"@type"`
	Name string `json:"name"`
}

JSONLDBrand represents the Brand schema

type JSONLDOffer

type JSONLDOffer struct {
	Type          string  `json:"@type"`
	Availability  string  `json:"availability"`
	PriceCurrency string  `json:"priceCurrency"`
	Price         float64 `json:"price"`
}

JSONLDOffer represents the Offer schema

type JSONLDProduct

type JSONLDProduct struct {
	Context         string                 `json:"@context"`
	Type            string                 `json:"@type"`
	Name            string                 `json:"name"`
	Description     string                 `json:"description"`
	Image           interface{}            `json:"image"` // Can be string or array
	SKU             string                 `json:"sku"`
	Brand           *JSONLDBrand           `json:"brand"`
	SubjectOf       *JSONLDVideoObject     `json:"subjectOf"`
	Offers          *JSONLDOffer           `json:"offers"`
	AggregateRating *JSONLDAggregateRating `json:"aggregateRating"`
}

JSONLDProduct represents the Product schema from JSON-LD

type JSONLDVideoObject

type JSONLDVideoObject struct {
	Type         string   `json:"@type"`
	Name         string   `json:"name"`
	Description  string   `json:"description"`
	ContentURL   string   `json:"contentUrl"`
	ThumbnailURL string   `json:"thumbnailUrl"`
	UploadDate   string   `json:"uploadDate"`
	Genre        []string `json:"genre"`
}

JSONLDVideoObject represents the VideoObject schema

type Scraper

type Scraper struct {
	// contains filtered or unexported fields
}

Scraper implements the DMM/Fanza scraper

func New

func New(settings config.ScraperSettings, globalConfig *config.ScrapersConfig, contentIDRepo *database.ContentIDMappingRepository) *Scraper

func (*Scraper) CanHandleURL

func (s *Scraper) CanHandleURL(rawURL string) bool

CanHandleURL returns true if this scraper can handle the given URL

func (*Scraper) Close

func (s *Scraper) Close() error

Close cleans up resources held by the scraper

func (*Scraper) Config

func (s *Scraper) Config() *config.ScraperSettings

Config returns the scraper's configuration

func (*Scraper) ExtractIDFromURL

func (s *Scraper) ExtractIDFromURL(urlStr string) (string, error)

ExtractIDFromURL extracts the movie ID from a DMM URL

func (*Scraper) GetURL

func (s *Scraper) GetURL(id string) (string, error)

GetURL attempts to find the URL for a given movie ID using DMM search

func (*Scraper) IsEnabled

func (s *Scraper) IsEnabled() bool

IsEnabled returns whether the scraper is enabled

func (*Scraper) Name

func (s *Scraper) Name() string

Name returns the scraper identifier

func (*Scraper) ResolveContentID

func (s *Scraper) ResolveContentID(id string) (string, error)

ResolveContentID attempts to resolve the display ID to an actual DMM content ID by first checking the cache, then scraping DMM search if needed

func (*Scraper) ResolveDownloadProxyForHost

func (s *Scraper) ResolveDownloadProxyForHost(host string) (*config.ProxyConfig, *config.ProxyConfig, bool)

ResolveDownloadProxyForHost declares DMM-owned media hosts for downloader proxy routing.

func (*Scraper) ScrapeURL

func (s *Scraper) ScrapeURL(url string) (*models.ScraperResult, error)

ScrapeURL directly scrapes metadata from a DMM URL. This provides more accurate results than ID-based search when the exact URL is known.

func (*Scraper) Search

func (s *Scraper) Search(id string) (*models.ScraperResult, error)

Search searches for and scrapes metadata for a given movie ID

func (*Scraper) ValidateConfig

func (s *Scraper) ValidateConfig(cfg *config.ScraperSettings) error

ValidateConfig validates the scraper configuration. Returns error if config is invalid, nil if valid.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL