extractor

package
v0.13.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 14, 2026 License: Apache-2.0 Imports: 25 Imported by: 0

Documentation

Index

Constants

View Source
const (
	TelegramDesktopAppID   = telegram.DesktopAppID
	TelegramDesktopAppHash = telegram.DesktopAppHash
)

Re-export constants

View Source
const (
	TwitterErrorNSFW        = "nsfw"
	TwitterErrorProtected   = "protected"
	TwitterErrorUnavailable = "unavailable"
)

Error code constants

Variables

View Source
var (
	TelegramDownload            = telegram.Download
	TelegramDownloadWithOptions = telegram.DownloadWithOptions
	TelegramSessionPath         = telegram.SessionPath
	TelegramSessionExists       = telegram.SessionExists
)

Re-export telegram package functions for external use

Functions

func AVToBV added in v0.11.0

func AVToBV(avid int64) (string, error)

AVToBV converts an AV number to BV ID

func BVToAV added in v0.11.0

func BVToAV(bvid string) (int64, error)

BVToAV converts a BV ID to AV number

func DownloadWithYtdlp

func DownloadWithYtdlp(url, outputDir string) error

DownloadWithYtdlp downloads a YouTube video using yt-dlp directly

func DownloadWithYtdlpProgress

func DownloadWithYtdlpProgress(ctx context.Context, url, outputDir string, progressFn func(downloaded, total int64)) error

DownloadWithYtdlpProgress downloads a YouTube video using yt-dlp with progress callback

func NormalizeURL added in v0.11.6

func NormalizeURL(rawURL string) (string, error)

NormalizeURL adds https:// scheme if missing and validates the result

func Register

func Register(e Extractor, hosts ...string)

Register adds an extractor for the given hostnames

func RegisterFallback

func RegisterFallback(e Extractor)

RegisterFallback sets the fallback extractor for direct files and unknown hosts

func SanitizeFilename

func SanitizeFilename(name string) string

SanitizeFilename removes or replaces characters that are invalid in filenames

Types

type AudioMedia

type AudioMedia struct {
	ID       string
	Title    string
	Uploader string
	Duration int // seconds
	URL      string
	Ext      string // "mp3", "m4a", etc.
}

AudioMedia represents audio content (podcasts, music)

func (*AudioMedia) GetID

func (a *AudioMedia) GetID() string

func (*AudioMedia) GetTitle

func (a *AudioMedia) GetTitle() string

func (*AudioMedia) GetUploader

func (a *AudioMedia) GetUploader() string

func (*AudioMedia) Type

func (a *AudioMedia) Type() MediaType

type BilibiliExtractor added in v0.11.0

type BilibiliExtractor struct {
	// contains filtered or unexported fields
}

BilibiliExtractor handles Bilibili video extraction

func (*BilibiliExtractor) Extract added in v0.11.0

func (b *BilibiliExtractor) Extract(urlStr string) (Media, error)

Extract retrieves video information from a Bilibili URL

func (*BilibiliExtractor) Match added in v0.11.0

func (b *BilibiliExtractor) Match(u *url.URL) bool

Match checks if URL is a Bilibili video URL

func (*BilibiliExtractor) Name added in v0.11.0

func (b *BilibiliExtractor) Name() string

Name returns the extractor name

type BilibiliStreamInfo added in v0.11.0

type BilibiliStreamInfo struct {
	Videos []struct {
		ID        int      `json:"id"`
		BaseURL   string   `json:"baseUrl"`
		BackupURL []string `json:"backupUrl"`
		Bandwidth int64    `json:"bandwidth"`
		Width     int      `json:"width"`
		Height    int      `json:"height"`
		Codecs    string   `json:"codecs"`
		CodecID   int      `json:"codecid"`
	} `json:"video"`
	Audios []struct {
		ID        int      `json:"id"`
		BaseURL   string   `json:"baseUrl"`
		BackupURL []string `json:"backupUrl"`
		Bandwidth int64    `json:"bandwidth"`
		Codecs    string   `json:"codecs"`
	} `json:"audio"`
}

BilibiliStreamInfo represents stream data

type BilibiliVideoInfo added in v0.11.0

type BilibiliVideoInfo struct {
	Title    string `json:"title"`
	Desc     string `json:"desc"`
	Pic      string `json:"pic"`
	Duration int    `json:"duration"`
	Owner    struct {
		Mid  int64  `json:"mid"`
		Name string `json:"name"`
	} `json:"owner"`
	Pages []struct {
		CID      int64  `json:"cid"`
		Page     int    `json:"page"`
		Part     string `json:"part"`
		Duration int    `json:"duration"`
	} `json:"pages"`
}

BilibiliVideoInfo represents video metadata

type BrowserExtractor

type BrowserExtractor struct {
	// contains filtered or unexported fields
}

BrowserExtractor uses browser automation to intercept media URLs

func NewBrowserExtractor

func NewBrowserExtractor(site *config.Site, visible bool) *BrowserExtractor

NewBrowserExtractor creates a new browser extractor for the given site

func NewGenericBrowserExtractor

func NewGenericBrowserExtractor(visible bool) *BrowserExtractor

NewGenericBrowserExtractor creates a browser extractor for unknown sites (defaults to m3u8)

func (*BrowserExtractor) Extract

func (e *BrowserExtractor) Extract(rawURL string) (Media, error)

func (*BrowserExtractor) Match

func (e *BrowserExtractor) Match(u *url.URL) bool

func (*BrowserExtractor) Name

func (e *BrowserExtractor) Name() string

type DirectExtractor

type DirectExtractor struct {
	// contains filtered or unexported fields
}

DirectExtractor handles direct file URLs (mp4, mp3, jpg, etc.) This is a fallback extractor that matches any URL not handled by others

func (*DirectExtractor) Extract

func (d *DirectExtractor) Extract(urlStr string) (Media, error)

Extract retrieves media information from a direct URL

func (*DirectExtractor) Match

func (d *DirectExtractor) Match(u *url.URL) bool

Match always returns true - this is the fallback extractor

func (*DirectExtractor) Name

func (d *DirectExtractor) Name() string

Name returns the extractor name

type Extractor

type Extractor interface {
	// Name returns the extractor name (e.g., "twitter", "direct")
	Name() string

	// Match returns true if this extractor can handle the URL
	// The URL is pre-parsed so extractors can reliably check the host/domain
	Match(u *url.URL) bool

	// Extract retrieves media information from the URL
	Extract(url string) (Media, error)
}

Extractor defines the interface for media extractors

func List

func List() []Extractor

List returns all unique registered extractors

func Match

func Match(rawURL string) Extractor

Match finds the extractor for a URL using O(1) hostname lookup Returns nil for unknown hosts (caller should check sites.yml)

type Image

type Image struct {
	URL    string
	Ext    string // "jpg", "png", "webp"
	Width  int
	Height int
}

Image represents a single image to download

type ImageMedia

type ImageMedia struct {
	ID       string
	Title    string
	Uploader string
	Images   []Image
}

ImageMedia represents one or more images from a single source

func (*ImageMedia) GetID

func (i *ImageMedia) GetID() string

func (*ImageMedia) GetTitle

func (i *ImageMedia) GetTitle() string

func (*ImageMedia) GetUploader

func (i *ImageMedia) GetUploader() string

func (*ImageMedia) Type

func (i *ImageMedia) Type() MediaType

type InstagramExtractor

type InstagramExtractor struct{}

InstagramExtractor handles Instagram video downloads

func (*InstagramExtractor) Extract

func (e *InstagramExtractor) Extract(url string) (Media, error)

func (*InstagramExtractor) Match

func (e *InstagramExtractor) Match(u *url.URL) bool

func (*InstagramExtractor) Name

func (e *InstagramExtractor) Name() string

type M3U8Extractor

type M3U8Extractor struct {
	// contains filtered or unexported fields
}

M3U8Extractor handles direct m3u8 playlist URLs

func (*M3U8Extractor) Extract

func (m *M3U8Extractor) Extract(urlStr string) (Media, error)

Extract retrieves media information from an m3u8 URL

func (*M3U8Extractor) Match

func (m *M3U8Extractor) Match(u *url.URL) bool

Match checks if the URL is an m3u8 playlist

func (*M3U8Extractor) Name

func (m *M3U8Extractor) Name() string

Name returns the extractor name

type Media

type Media interface {
	GetID() string
	GetTitle() string
	GetUploader() string
	Type() MediaType
}

Media is the interface for all extracted media types

type MediaType

type MediaType string

MediaType represents the type of media being downloaded

const (
	MediaTypeVideo MediaType = "video"
	MediaTypeAudio MediaType = "audio"
	MediaTypeImage MediaType = "image"
)

type MultiVideoMedia

type MultiVideoMedia struct {
	ID       string
	Title    string
	Uploader string
	Videos   []*VideoMedia
}

MultiVideoMedia represents multiple videos from a single source (e.g., Twitter multi-video tweets)

func (*MultiVideoMedia) GetID

func (m *MultiVideoMedia) GetID() string

func (*MultiVideoMedia) GetTitle

func (m *MultiVideoMedia) GetTitle() string

func (*MultiVideoMedia) GetUploader

func (m *MultiVideoMedia) GetUploader() string

func (*MultiVideoMedia) Type

func (m *MultiVideoMedia) Type() MediaType

type TelegramDownloadOptions added in v0.11.2

type TelegramDownloadOptions = telegram.DownloadOptions

type TelegramDownloadResult

type TelegramDownloadResult = telegram.DownloadResult

Re-export types

type TelegramExtractor

type TelegramExtractor struct {
	// contains filtered or unexported fields
}

TelegramExtractor wraps the telegram.Extractor for registration

func (*TelegramExtractor) Extract

func (t *TelegramExtractor) Extract(urlStr string) (Media, error)

func (*TelegramExtractor) Match

func (t *TelegramExtractor) Match(u *url.URL) bool

func (*TelegramExtractor) Name

func (t *TelegramExtractor) Name() string

type TikTokExtractor

type TikTokExtractor struct{}

TikTokExtractor handles TikTok video downloads

func (*TikTokExtractor) Extract

func (e *TikTokExtractor) Extract(url string) (Media, error)

func (*TikTokExtractor) Match

func (e *TikTokExtractor) Match(u *url.URL) bool

func (*TikTokExtractor) Name

func (e *TikTokExtractor) Name() string

type TwitterError

type TwitterError struct {
	Code    string // "nsfw", "protected", "unavailable"
	Message string // Original message for fallback
}

Twitter-specific error types for i18n support

func (*TwitterError) Error

func (e *TwitterError) Error() string

type TwitterExtractor

type TwitterExtractor struct {
	// contains filtered or unexported fields
}

TwitterExtractor handles Twitter/X media extraction

func (*TwitterExtractor) Extract

func (t *TwitterExtractor) Extract(urlStr string) (Media, error)

Extract retrieves media from a Twitter/X URL

func (*TwitterExtractor) IsAuthenticated

func (t *TwitterExtractor) IsAuthenticated() bool

IsAuthenticated returns true if auth credentials are set

func (*TwitterExtractor) Match

func (t *TwitterExtractor) Match(u *url.URL) bool

Match checks if URL is a Twitter/X status URL

func (*TwitterExtractor) Name

func (t *TwitterExtractor) Name() string

Name returns the extractor name

func (*TwitterExtractor) SetAuth

func (t *TwitterExtractor) SetAuth(authToken string)

SetAuth sets authentication credentials for accessing restricted content

type VideoFormat

type VideoFormat struct {
	URL      string
	Quality  string // "1080p", "720p", etc.
	Ext      string // "mp4", "m3u8", "ts"
	Width    int
	Height   int
	Bitrate  int
	Headers  map[string]string // Custom headers for download (e.g., Referer)
	AudioURL string            // Separate audio stream URL (for adaptive formats that need merging)
}

VideoFormat represents a single video quality option

func (*VideoFormat) QualityLabel

func (f *VideoFormat) QualityLabel() string

QualityLabel returns a human-readable quality label

type VideoMedia

type VideoMedia struct {
	ID        string
	Title     string
	Uploader  string
	Duration  int // seconds
	Thumbnail string
	Formats   []VideoFormat
}

VideoMedia represents video content with multiple format options

func (*VideoMedia) GetID

func (v *VideoMedia) GetID() string

func (*VideoMedia) GetTitle

func (v *VideoMedia) GetTitle() string

func (*VideoMedia) GetUploader

func (v *VideoMedia) GetUploader() string

func (*VideoMedia) Type

func (v *VideoMedia) Type() MediaType

type XiaohongshuExtractor

type XiaohongshuExtractor struct {
	// contains filtered or unexported fields
}

XiaohongshuExtractor handles Xiaohongshu video/image downloads using browser automation

func (*XiaohongshuExtractor) Extract

func (e *XiaohongshuExtractor) Extract(rawURL string) (Media, error)

func (*XiaohongshuExtractor) Match

func (e *XiaohongshuExtractor) Match(u *url.URL) bool

func (*XiaohongshuExtractor) Name

func (e *XiaohongshuExtractor) Name() string

func (*XiaohongshuExtractor) SetVisible

func (e *XiaohongshuExtractor) SetVisible(visible bool)

SetVisible configures whether to show the browser window

type XiaoyuzhouExtractor

type XiaoyuzhouExtractor struct{}

XiaoyuzhouExtractor handles xiaoyuzhoufm.com podcast downloads

func (*XiaoyuzhouExtractor) Extract

func (e *XiaoyuzhouExtractor) Extract(url string) (Media, error)

func (*XiaoyuzhouExtractor) Match

func (e *XiaoyuzhouExtractor) Match(u *url.URL) bool

func (*XiaoyuzhouExtractor) Name

func (e *XiaoyuzhouExtractor) Name() string

type YouTubeDirectDownload

type YouTubeDirectDownload struct {
	URL       string
	OutputDir string
}

YouTubeDirectDownload indicates yt-dlp should handle the download directly

func (*YouTubeDirectDownload) GetID

func (y *YouTubeDirectDownload) GetID() string

Implement Media interface for YouTubeDirectDownload

func (*YouTubeDirectDownload) GetTitle

func (y *YouTubeDirectDownload) GetTitle() string

func (*YouTubeDirectDownload) GetUploader

func (y *YouTubeDirectDownload) GetUploader() string

func (*YouTubeDirectDownload) Type

func (y *YouTubeDirectDownload) Type() MediaType

type YouTubeDockerRequiredError

type YouTubeDockerRequiredError struct {
	URL string
}

YouTubeDockerRequiredError indicates YouTube extraction needs Docker

func (*YouTubeDockerRequiredError) Error

Directories

Path Synopsis
Package telegram provides Telegram media extraction and download functionality.
Package telegram provides Telegram media extraction and download functionality.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL