Documentation
¶
Index ¶
- Constants
- func FormatActressName(lastName, firstName, japaneseName string) string
- func IsCloudflareChallengePage(body string) bool
- func ResolveSearchQueryForScraper(scraper Scraper, input string) (string, bool)
- type Actress
- type ActressAlias
- type ActressInfo
- type BatchFileOperation
- type ContentIDMapping
- type ContentIDResolver
- type DirectURLScraper
- type Event
- type Genre
- type GenreReplacement
- type History
- type Job
- type JobStatus
- type Movie
- type MovieTag
- type MovieTranslation
- type Rating
- type Scraper
- type ScraperChoice
- type ScraperDownloadProxyResolver
- type ScraperError
- type ScraperErrorKind
- type ScraperHTTPError
- type ScraperOption
- type ScraperQueryResolver
- type ScraperRegistry
- func (r *ScraperRegistry) Get(name string) (Scraper, bool)
- func (r *ScraperRegistry) GetAll() []Scraper
- func (r *ScraperRegistry) GetByPriority(priority []string) []Scraper
- func (r *ScraperRegistry) GetByPriorityForInput(priority []string, input string) []Scraper
- func (r *ScraperRegistry) GetEnabled() []Scraper
- func (r *ScraperRegistry) Register(scraper Scraper)
- func (r *ScraperRegistry) Reset()
- type ScraperResult
- type URLHandler
Constants ¶
const ( RevertStatusApplied = "applied" // Renamed from "pending" — D-01 RevertStatusReverted = "reverted" RevertStatusFailed = "failed" )
Revert status constants for BatchFileOperation
const ( RevertOutcomeReverted = "reverted" // Successfully reverted RevertOutcomeSkipped = "skipped" // Skipped (e.g., anchor missing) RevertOutcomeFailed = "failed" // Failed to revert )
RevertOutcome constants for per-operation result tracking — D-06
const ( RevertReasonAnchorMissing = "anchor_missing" // Video file missing at expected path RevertReasonDestinationConflict = "destination_conflict" // Original path already occupied RevertReasonAccessDenied = "access_denied" // Permission error during revert RevertReasonUnexpectedPathState = "unexpected_path_state" // File in unexpected state RevertReasonNFORestoreFailed = "nfo_restore_failed" // NFO write failed RevertReasonGeneratedCleanupFailed = "generated_cleanup_failed" // Generated file cleanup failed )
RevertReason constants for why a revert had a specific outcome — D-06
const ( OperationTypeMove = "move" OperationTypeCopy = "copy" OperationTypeHardlink = "hardlink" OperationTypeSymlink = "symlink" OperationTypeUpdate = "update" // update-mode organize (NFO overwrite, no file move) per HIST-05 )
Operation type constants for BatchFileOperation
const ( EventCategoryScraper = "scraper" EventCategoryOrganize = "organize" EventCategorySystem = "system" )
Event category constants for the event_type field
const ( SeverityDebug = "debug" SeverityInfo = "info" SeverityWarn = "warn" SeverityError = "error" )
Severity constants matching slog/Logrus levels (per D-08)
Variables ¶
This section is empty.
Functions ¶
func FormatActressName ¶
FormatActressName builds a display name from actress name components
func IsCloudflareChallengePage ¶
IsCloudflareChallengePage detects Cloudflare anti-bot/interstitial challenge pages from HTML content. These pages are often returned with HTTP 200.
Types ¶
type Actress ¶
type Actress struct {
ID uint `json:"id" gorm:"primaryKey"`
DMMID int `json:"dmm_id"` // Real DMM actress ID when available (unique only for values > 0)
FirstName string `json:"first_name"`
LastName string `json:"last_name"`
JapaneseName string `json:"japanese_name" gorm:"index"`
ThumbURL string `json:"thumb_url"`
Aliases string `json:"aliases"` // Pipe-separated
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
Actress represents a JAV actress
type ActressAlias ¶
type ActressAlias struct {
ID uint `json:"id" gorm:"primaryKey"`
AliasName string `json:"alias_name" gorm:"uniqueIndex;not null"` // The alternate name (e.g., "Yui Hatano")
CanonicalName string `json:"canonical_name" gorm:"index;not null"` // The canonical/preferred name (e.g., "Hatano Yui")
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
ActressAlias represents an alternate name mapping for an actress This allows users to consolidate multiple actress names into a canonical one
type ActressInfo ¶
type ActressInfo struct {
DMMID int `json:"dmm_id"` // DMM actress ID for unique identification
FirstName string `json:"first_name"`
LastName string `json:"last_name"`
JapaneseName string `json:"japanese_name"`
ThumbURL string `json:"thumb_url"`
}
ActressInfo represents actress information from a scraper
func (*ActressInfo) FullName ¶
func (a *ActressInfo) FullName() string
FullName returns the actress's full name
type BatchFileOperation ¶
type BatchFileOperation struct {
ID uint `json:"id" gorm:"primaryKey"`
BatchJobID string `json:"batch_job_id" gorm:"not null;index:idx_bfo_batch_job_id;index:idx_bfo_batch_job_revert_status,priority:1"`
MovieID string `json:"movie_id"`
OriginalPath string `json:"original_path" gorm:"not null"`
NewPath string `json:"new_path" gorm:"not null"`
OperationType string `json:"operation_type" gorm:"not null;default:move"`
NFOSnapshot string `json:"nfo_snapshot" gorm:"type:text"`
NFOPath string `json:"nfo_path" gorm:"type:text"`
GeneratedFiles string `json:"generated_files" gorm:"type:text"`
RevertStatus string `json:"revert_status" gorm:"not null;default:applied;index:idx_bfo_batch_job_revert_status,priority:2"`
RevertedAt *time.Time `json:"reverted_at"`
InPlaceRenamed bool `json:"in_place_renamed" gorm:"not null;default:false"`
OriginalDirPath string `json:"original_dir_path"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
BatchFileOperation represents per-file organize details for revert support
func (BatchFileOperation) TableName ¶
func (BatchFileOperation) TableName() string
TableName specifies the table name for BatchFileOperation
type ContentIDMapping ¶
type ContentIDMapping struct {
ID uint `gorm:"primarykey" json:"id"`
SearchID string `gorm:"uniqueIndex;not null" json:"search_id"` // e.g., "MDB-087"
ContentID string `gorm:"not null" json:"content_id"` // e.g., "61mdb087"
Source string `gorm:"not null" json:"source"` // e.g., "dmm"
CreatedAt time.Time `json:"created_at"`
}
ContentIDMapping stores the mapping between search IDs and actual DMM content IDs This is used to cache the resolution of display IDs (like "MDB-087") to actual content IDs (like "61mdb087") that DMM uses internally
type ContentIDResolver ¶
ContentIDResolver is an optional interface for scrapers that can resolve a JAV ID to its DMM content-ID format (e.g., "ipx-123" -> "118BDP-00118").
This is primarily used by DMM to normalize IDs before querying other scrapers, since many scrapers share the same DMM content-ID format.
Implementations should return (resolvedID, nil) on success or ("", error) on failure. If a scraper does not support content-ID resolution, it should return (input, false).
type DirectURLScraper ¶
type DirectURLScraper interface {
// ScrapeURL directly scrapes metadata from a URL.
// Returns ScraperResult on success, or error with typed ScraperError on failure.
// Implementations should return ScraperErrorKindNotFound for non-existent pages.
// Context enables cancellation and timeout propagation through rate limiters and HTTP requests.
ScrapeURL(ctx context.Context, url string) (*ScraperResult, error)
}
DirectURLScraper is an optional interface for scrapers that can directly scrape URLs. Scrapers implementing this interface can extract more accurate metadata from direct URLs than from ID-based search results.
type Event ¶
type Event struct {
ID uint `json:"id" gorm:"primaryKey"`
EventType string `json:"event_type" gorm:"not null;index:idx_events_type;index:idx_events_type_severity;index:idx_events_type_source"`
Severity string `json:"severity" gorm:"not null;index:idx_events_severity;index:idx_events_type_severity"`
Message string `json:"message" gorm:"not null;type:text"`
Context string `json:"context" gorm:"type:text"` // JSON-encoded details
Source string `json:"source" gorm:"index:idx_events_source;index:idx_events_type_source"`
CreatedAt time.Time `json:"created_at" gorm:"not null;index:idx_events_created_at"`
}
Event represents a structured event log entry for debugging and bug reporting
type Genre ¶
type Genre struct {
ID uint `json:"id" gorm:"primaryKey"`
Name string `json:"name" gorm:"uniqueIndex"`
}
Genre represents a category/tag
type GenreReplacement ¶
type GenreReplacement struct {
ID uint `json:"id" gorm:"primaryKey"`
Original string `json:"original" gorm:"uniqueIndex;not null"`
Replacement string `json:"replacement" gorm:"not null"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
GenreReplacement represents a user-defined genre name mapping
type History ¶
type History struct {
ID uint `json:"id" gorm:"primaryKey"`
MovieID string `json:"movie_id" gorm:"index"` // Foreign key to movies.content_id (nullable for historical records)
BatchJobID *string `json:"batch_job_id" gorm:"index"` // Foreign key to jobs.id (nullable: historical records have no batch job)
Operation string `json:"operation"` // "scrape", "organize", "download", "nfo"
OriginalPath string `json:"original_path"` // Source file path
NewPath string `json:"new_path"` // Destination file path
Status string `json:"status"` // "success", "failed", "reverted"
ErrorMessage string `json:"error_message" gorm:"type:text"` // Error details if failed
Metadata string `json:"metadata" gorm:"type:json"` // Additional metadata (JSON)
DryRun bool `json:"dry_run"` // Whether this was a dry run
CreatedAt time.Time `json:"created_at" gorm:"index"`
}
History represents a log of file organization operations
type Job ¶
type Job struct {
ID string `json:"id" gorm:"primaryKey"`
Status string `json:"status" gorm:"index"`
TotalFiles int `json:"total_files"`
Completed int `json:"completed"`
Failed int `json:"failed"`
Progress float64 `json:"progress"`
Destination string `json:"destination"`
TempDir string `json:"temp_dir" gorm:"default:'data/temp'"`
Files string `json:"files" gorm:"type:text"`
Results string `json:"results" gorm:"type:text"`
Excluded string `json:"excluded" gorm:"type:text"`
FileMatchInfo string `json:"file_match_info" gorm:"type:text"`
StartedAt time.Time `json:"started_at" gorm:"index"`
CompletedAt *time.Time `json:"completed_at"`
OrganizedAt *time.Time `json:"organized_at"`
RevertedAt *time.Time `json:"reverted_at"`
}
type Movie ¶
type Movie struct {
ContentID string `json:"content_id" gorm:"primaryKey"`
ID string `json:"id" gorm:"index"`
DisplayTitle string `json:"display_title"`
Title string `json:"title"`
OriginalTitle string `json:"original_title"` // Japanese/original language title
Description string `json:"description" gorm:"type:text"`
ReleaseDate *time.Time `json:"release_date"`
ReleaseYear int `json:"release_year"`
Runtime int `json:"runtime"` // in minutes
Director string `json:"director"`
Maker string `json:"maker"` // Studio/maker
Label string `json:"label"` // Sub-label
Series string `json:"series"` // Series name
RatingScore float64 `json:"rating_score" gorm:"column:rating_score"`
RatingVotes int `json:"rating_votes" gorm:"column:rating_votes"`
PosterURL string `json:"poster_url"` // Portrait/box art image
CoverURL string `json:"cover_url"` // Landscape/fanart image
CroppedPosterURL string `json:"cropped_poster_url"` // URL to the cropped poster (persisted)
ShouldCropPoster bool `json:"should_crop_poster"` // Whether poster needs cropping from cover
TrailerURL string `json:"trailer_url"`
OriginalFileName string `json:"original_filename"`
// Relationships
Actresses []Actress `` /* 139-byte string literal not displayed */
Genres []Genre `` /* 131-byte string literal not displayed */
Screenshots []string `json:"screenshot_urls" gorm:"serializer:json"`
// Translations
Translations []MovieTranslation `json:"translations" gorm:"foreignKey:MovieID;references:ContentID"`
// Metadata
SourceName string `json:"source_name"` // Primary source
SourceURL string `json:"source_url"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
Movie represents the aggregated metadata for a JAV movie
type MovieTag ¶
type MovieTag struct {
ID uint `json:"id" gorm:"primaryKey"`
MovieID string `json:"movie_id" gorm:"index:idx_movie_tag,unique;not null;size:50"` // Foreign key to movies.content_id (CASCADE handled in Delete)
Tag string `json:"tag" gorm:"index:idx_movie_tag,unique;not null;size:100"` // Tag name (case-sensitive)
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
MovieTag represents a custom user-defined tag for a specific movie Tags are used for personal organization and appear in NFO files
type MovieTranslation ¶
type MovieTranslation struct {
ID uint `json:"id" gorm:"primaryKey"`
MovieID string `json:"movie_id" gorm:"index:idx_movie_language,unique"`
Language string `json:"language" gorm:"index:idx_movie_language,unique;size:5"` // ISO 639-1: en, ja, zh, etc.
Title string `json:"title"`
OriginalTitle string `json:"original_title"` // Japanese/original language title
Description string `json:"description" gorm:"type:text"`
Director string `json:"director"`
Maker string `json:"maker"`
Label string `json:"label"`
Series string `json:"series"`
SourceName string `json:"source_name"` // Which scraper provided this translation
SettingsHash string `gorm:"type:varchar(16)" json:"settings_hash"` // Hash of translation settings used
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
MovieTranslation represents a movie's metadata in a specific language
func (MovieTranslation) TableName ¶
func (MovieTranslation) TableName() string
TableName specifies the table name for MovieTranslation
type Scraper ¶
type Scraper interface {
// Name returns the scraper's identifier (e.g., "r18dev", "dmm")
Name() string
// Search attempts to find and scrape metadata for the given movie ID.
// Context enables cancellation and timeout propagation through rate limiters and HTTP requests.
Search(ctx context.Context, id string) (*ScraperResult, error)
// GetURL attempts to find the URL for a given movie ID
GetURL(id string) (string, error)
// IsEnabled returns whether this scraper is enabled in configuration
IsEnabled() bool
// Config returns the scraper's configuration
Config() *config.ScraperSettings
// Close cleans up resources held by the scraper (e.g., HTTP clients, browsers)
// Returns nil if no cleanup is needed
Close() error
}
Scraper defines the interface that all scrapers must implement
type ScraperChoice ¶
type ScraperChoice struct {
Value string `json:"value" example:"en"`
Label string `json:"label" example:"English"`
}
ScraperChoice represents a choice for a select-type scraper option
type ScraperDownloadProxyResolver ¶
type ScraperDownloadProxyResolver interface {
ResolveDownloadProxyForHost(host string) (downloadOverride *config.ProxyConfig, scraperProxy *config.ProxyConfig, handled bool)
}
ScraperDownloadProxyResolver is an optional hook for scrapers to control media download proxy routing for scraper-specific media/CDN hosts.
Implementations should return handled=false for unrelated hosts. When handled=true, downloader applies the same proxy precedence rules used by scraper download_proxy/proxy/global settings.
type ScraperError ¶
type ScraperError struct {
Scraper string
Kind ScraperErrorKind
StatusCode int
Message string
Temporary bool
Retryable bool
Cause error
}
ScraperError is a typed scraper failure that worker/UI layers can classify without brittle string parsing.
func AsScraperError ¶
func AsScraperError(err error) (*ScraperError, bool)
AsScraperError extracts a ScraperError from any wrapped error chain.
func NewScraperChallengeError ¶
func NewScraperChallengeError(scraper, message string) *ScraperError
NewScraperChallengeError builds a typed blocked error for anti-bot challenge pages (for example Cloudflare challenge interstitials served with HTTP 200).
func NewScraperNotFoundError ¶
func NewScraperNotFoundError(scraper, message string) *ScraperError
NewScraperNotFoundError builds a typed "not found" scraper error.
func NewScraperStatusError ¶
func NewScraperStatusError(scraper string, statusCode int, message string) *ScraperError
NewScraperStatusError builds a typed scraper error from an HTTP status code.
func (*ScraperError) Error ¶
func (e *ScraperError) Error() string
func (*ScraperError) Unwrap ¶
func (e *ScraperError) Unwrap() error
type ScraperErrorKind ¶
type ScraperErrorKind string
ScraperErrorKind classifies scraper failures in a structured way.
const ( ScraperErrorKindUnknown ScraperErrorKind = "unknown" ScraperErrorKindNotFound ScraperErrorKind = "not_found" ScraperErrorKindRateLimited ScraperErrorKind = "rate_limited" ScraperErrorKindBlocked ScraperErrorKind = "blocked" )
type ScraperHTTPError ¶
func NewScraperHTTPError ¶
func NewScraperHTTPError(scraper string, statusCode int, message string) *ScraperHTTPError
func (*ScraperHTTPError) Error ¶
func (e *ScraperHTTPError) Error() string
type ScraperOption ¶
type ScraperOption struct {
Key string `json:"key" example:"scrape_actress"`
Label string `json:"label" example:"Scrape Actress Information"`
Description string `json:"description" example:"Enable detailed actress data scraping from DMM (may be slower)"`
Type string `json:"type" example:"boolean"`
Default interface{} `json:"default,omitempty"`
Min *int `json:"min,omitempty" example:"5"`
Max *int `json:"max,omitempty" example:"120"`
Unit string `json:"unit,omitempty" example:"seconds"`
Choices []ScraperChoice `json:"choices,omitempty"`
}
ScraperOption represents a configurable option for a scraper
type ScraperQueryResolver ¶
ScraperQueryResolver is an optional hook for scrapers to declare and normalize identifier formats they can handle (e.g., non-standard filename IDs).
Implementations should return (normalizedQuery, true) when input matches a scraper-specific pattern, or ("", false) when it does not apply.
type ScraperRegistry ¶
type ScraperRegistry struct {
// contains filtered or unexported fields
}
ScraperRegistry manages available scrapers
func NewScraperRegistry ¶
func NewScraperRegistry() *ScraperRegistry
NewScraperRegistry creates a new scraper registry
func (*ScraperRegistry) Get ¶
func (r *ScraperRegistry) Get(name string) (Scraper, bool)
Get retrieves a scraper by name
func (*ScraperRegistry) GetAll ¶
func (r *ScraperRegistry) GetAll() []Scraper
GetAll returns all registered scrapers in sorted key order for deterministic iteration
func (*ScraperRegistry) GetByPriority ¶
func (r *ScraperRegistry) GetByPriority(priority []string) []Scraper
GetByPriority returns enabled scrapers in the specified priority order If priority list is empty or nil, returns all enabled scrapers Only returns scrapers that are both in the priority list AND enabled
func (*ScraperRegistry) GetByPriorityForInput ¶
func (r *ScraperRegistry) GetByPriorityForInput(priority []string, input string) []Scraper
GetByPriorityForInput returns enabled scrapers in priority order, but moves scrapers with matching query resolvers to the front for the provided input.
If no scraper resolver matches, the original GetByPriority ordering is returned unchanged.
func (*ScraperRegistry) GetEnabled ¶
func (r *ScraperRegistry) GetEnabled() []Scraper
GetEnabled returns all enabled scrapers in sorted key order for deterministic iteration
func (*ScraperRegistry) Register ¶
func (r *ScraperRegistry) Register(scraper Scraper)
Register adds a scraper to the registry
func (*ScraperRegistry) Reset ¶
func (r *ScraperRegistry) Reset()
Reset clears all registered scrapers from the registry. Primarily used for test isolation.
type ScraperResult ¶
type ScraperResult struct {
Source string `json:"source"`
SourceURL string `json:"source_url"`
Language string `json:"language"` // ISO 639-1 code: en, ja, zh, etc.
ID string `json:"id"`
ContentID string `json:"content_id"`
Title string `json:"title"`
OriginalTitle string `json:"original_title"` // Japanese/original language title
Description string `json:"description"`
ReleaseDate *time.Time `json:"release_date"`
Runtime int `json:"runtime"`
Director string `json:"director"`
Maker string `json:"maker"`
Label string `json:"label"`
Series string `json:"series"`
Rating *Rating `json:"rating"`
Actresses []ActressInfo `json:"actresses"`
Genres []string `json:"genres"`
PosterURL string `json:"poster_url"` // Portrait/box art image
CoverURL string `json:"cover_url"` // Landscape/fanart image
ShouldCropPoster bool `json:"should_crop_poster"` // Whether poster needs cropping from cover
ScreenshotURL []string `json:"screenshot_urls"`
TrailerURL string `json:"trailer_url"`
Translations []MovieTranslation `json:"translations,omitempty"` // Additional language translations (optional)
}
ScraperResult represents the raw data returned by a scraper
func (*ScraperResult) NormalizeMediaURLs ¶
func (r *ScraperResult) NormalizeMediaURLs()
NormalizeMediaURLs applies post-scrape media URL normalization hooks.
This currently upgrades DMM poster URLs ending in "ps.jpg" to "pl.jpg" to use higher-resolution assets when available.
type URLHandler ¶
type URLHandler interface {
// CanHandleURL returns true if this scraper can handle the given URL
CanHandleURL(url string) bool
// ExtractIDFromURL extracts the movie ID from a URL this scraper can handle
// Returns (id, nil) on success or ("", error) if extraction fails
ExtractIDFromURL(url string) (string, error)
}
URLHandler is an optional interface for scrapers that can handle direct URL scraping. Implementations should return true for URLs they can process and extract the movie ID.
This enables extensible URL detection - scrapers declare which URLs they handle instead of hardcoding patterns in the matcher.