dmm

package
v0.2.9-alpha Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 22, 2026 License: MIT Imports: 24 Imported by: 0

Documentation

Index

Constants

View Source
const (
	ConfigKeyPlaceholderThreshold   = placeholder.ConfigKeyThreshold
	ConfigKeyExtraPlaceholderHashes = placeholder.ConfigKeyHashes
)
View Source
const DefaultPlaceholderThresholdKB = placeholder.DefaultThresholdKB

Variables

View Source
var DefaultPlaceholderHashes = placeholder.DefaultDMMPlaceholderHashes

Functions

func FetchWithBrowser

func FetchWithBrowser(parentCtx context.Context, url string, timeout int, proxyProfile *config.ProxyProfile) (string, error)

FetchWithBrowser fetches a URL using Chrome browser automation with age verification cookies

func GetExtraPlaceholderHashes

func GetExtraPlaceholderHashes(settings *config.ScraperSettings) []string

func GetPlaceholderThreshold

func GetPlaceholderThreshold(settings *config.ScraperSettings) int

func IsPlaceholder

func IsPlaceholder(ctx context.Context, client *resty.Client, url string, thresholdBytes int64, hashes []string) (bool, error)

func MergePlaceholderHashes

func MergePlaceholderHashes(settings *config.ScraperSettings) []string

func NewHTTPClient

func NewHTTPClient(cfg *config.ScraperSettings, globalProxy *config.ProxyConfig, globalFlareSolverr config.FlareSolverrConfig) (*resty.Client, *config.ProxyProfile, error)

NewHTTPClient creates an HTTP client for the DMM scraper. HTTP-01: Per-scraper HTTP client ownership. Returns client, effective proxyProfile (for browser use), and error.

Types

type DMMConfig

type DMMConfig struct {
	Enabled       bool                `yaml:"enabled" json:"enabled"`
	RequestDelay  int                 `yaml:"request_delay" json:"request_delay"`
	MaxRetries    int                 `yaml:"max_retries" json:"max_retries"`
	UserAgent     string              `yaml:"user_agent" json:"user_agent"`
	Proxy         *config.ProxyConfig `yaml:"proxy,omitempty" json:"proxy,omitempty"`
	DownloadProxy *config.ProxyConfig `yaml:"download_proxy,omitempty" json:"download_proxy,omitempty"`
	Priority      int                 `yaml:"priority" json:"priority"` // Scraper's priority (higher = higher priority)
	// Per-scraper browser and scrape_actress settings
	UseBrowser    bool `yaml:"use_browser" json:"use_browser"`
	ScrapeActress bool `yaml:"scrape_actress" json:"scrape_actress"`
	// Placeholder detection settings
	PlaceholderThresholdKB int      `yaml:"placeholder_threshold" json:"placeholder_threshold"`
	ExtraPlaceholderHashes []string `yaml:"extra_placeholder_hashes" json:"extra_placeholder_hashes"`
}

Config holds DMM/Fanza scraper configuration. YAML tags are defined here for unmarshaling via config.ScrapersConfig.

func (*DMMConfig) GetDownloadProxy

func (c *DMMConfig) GetDownloadProxy() any

GetDownloadProxy implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) GetMaxRetries

func (c *DMMConfig) GetMaxRetries() int

GetMaxRetries implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) GetProxy

func (c *DMMConfig) GetProxy() any

GetProxy implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) GetRequestDelay

func (c *DMMConfig) GetRequestDelay() int

GetRequestDelay implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) GetUserAgent

func (c *DMMConfig) GetUserAgent() string

GetUserAgent implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) IsEnabled

func (c *DMMConfig) IsEnabled() bool

IsEnabled implements scraperutil.ScraperConfigInterface.

func (*DMMConfig) ToScraperSettings

func (c *DMMConfig) ToScraperSettings() *config.ScraperSettings

ToScraperSettings converts DMMConfig to ScraperSettings, flowing placeholder settings to Extra map for runtime access by placeholder detection functions.

func (*DMMConfig) ValidateConfig

func (c *DMMConfig) ValidateConfig(sc *config.ScraperSettings) error

ValidateConfig implements config.ConfigValidator for DMMConfig.

type JSONLDAggregateRating

type JSONLDAggregateRating struct {
	Type        string  `json:"@type"`
	RatingValue float64 `json:"ratingValue"`
	RatingCount int     `json:"ratingCount"`
}

JSONLDAggregateRating represents the AggregateRating schema

type JSONLDBrand

type JSONLDBrand struct {
	Type string `json:"@type"`
	Name string `json:"name"`
}

JSONLDBrand represents the Brand schema

type JSONLDOffer

type JSONLDOffer struct {
	Type          string  `json:"@type"`
	Availability  string  `json:"availability"`
	PriceCurrency string  `json:"priceCurrency"`
	Price         float64 `json:"price"`
}

JSONLDOffer represents the Offer schema

type JSONLDProduct

type JSONLDProduct struct {
	Context         string                 `json:"@context"`
	Type            string                 `json:"@type"`
	Name            string                 `json:"name"`
	Description     string                 `json:"description"`
	Image           interface{}            `json:"image"` // Can be string or array
	SKU             string                 `json:"sku"`
	Brand           *JSONLDBrand           `json:"brand"`
	SubjectOf       *JSONLDVideoObject     `json:"subjectOf"`
	Offers          *JSONLDOffer           `json:"offers"`
	AggregateRating *JSONLDAggregateRating `json:"aggregateRating"`
}

JSONLDProduct represents the Product schema from JSON-LD

type JSONLDVideoObject

type JSONLDVideoObject struct {
	Type         string   `json:"@type"`
	Name         string   `json:"name"`
	Description  string   `json:"description"`
	ContentURL   string   `json:"contentUrl"`
	ThumbnailURL string   `json:"thumbnailUrl"`
	UploadDate   string   `json:"uploadDate"`
	Genre        []string `json:"genre"`
}

JSONLDVideoObject represents the VideoObject schema

type Scraper

type Scraper struct {
	// contains filtered or unexported fields
}

Scraper implements the DMM/Fanza scraper

func New

func New(settings config.ScraperSettings, globalConfig *config.ScrapersConfig, contentIDRepo *database.ContentIDMappingRepository) *Scraper

func (*Scraper) CanHandleURL

func (s *Scraper) CanHandleURL(rawURL string) bool

CanHandleURL returns true if this scraper can handle the given URL

func (*Scraper) Close

func (s *Scraper) Close() error

Close cleans up resources held by the scraper

func (*Scraper) Config

func (s *Scraper) Config() *config.ScraperSettings

Config returns the scraper's configuration

func (*Scraper) ExtractIDFromURL

func (s *Scraper) ExtractIDFromURL(urlStr string) (string, error)

ExtractIDFromURL extracts the movie ID from a DMM URL

func (*Scraper) GetURL

func (s *Scraper) GetURL(id string) (string, error)

GetURL attempts to find the URL for a given movie ID using DMM search

func (*Scraper) GetURLCtx

func (s *Scraper) GetURLCtx(ctx context.Context, id string) (string, error)

GetURLCtx attempts to find the URL for a given movie ID using DMM search with context support

func (*Scraper) IsEnabled

func (s *Scraper) IsEnabled() bool

IsEnabled returns whether the scraper is enabled

func (*Scraper) Name

func (s *Scraper) Name() string

Name returns the scraper identifier

func (*Scraper) ResolveContentID

func (s *Scraper) ResolveContentID(id string) (string, error)

func (*Scraper) ResolveContentIDCtx

func (s *Scraper) ResolveContentIDCtx(ctx context.Context, id string) (string, error)

ResolveContentIDCtx attempts to resolve the display ID to an actual DMM content ID by first checking the cache, then scraping DMM search if needed

func (*Scraper) ResolveDownloadProxyForHost

func (s *Scraper) ResolveDownloadProxyForHost(host string) (*config.ProxyConfig, *config.ProxyConfig, bool)

ResolveDownloadProxyForHost declares DMM-owned media hosts for downloader proxy routing.

func (*Scraper) ScrapeURL

func (s *Scraper) ScrapeURL(ctx context.Context, url string) (*models.ScraperResult, error)

ScrapeURL directly scrapes metadata from a DMM URL. This provides more accurate results than ID-based search when the exact URL is known.

func (*Scraper) Search

func (s *Scraper) Search(ctx context.Context, id string) (*models.ScraperResult, error)

Search searches for and scrapes metadata for a given movie ID

func (*Scraper) ValidateConfig

func (s *Scraper) ValidateConfig(cfg *config.ScraperSettings) error

ValidateConfig validates the scraper configuration. Returns error if config is invalid, nil if valid.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL