fetch

package
v0.0.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 15, 2026 License: MIT Imports: 18 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type CacheEntry

type CacheEntry struct {
	URL       string `json:"url"`
	File      string `json:"file"`       // 缓存文件名(MD5.html)
	FetchedAt string `json:"fetched_at"` // 抓取时间
	Status    int    `json:"status"`     // HTTP状态码(预留)
}

CacheEntry 缓存条目

type CacheManager

type CacheManager struct {
	// contains filtered or unexported fields
}

CacheManager HTML缓存管理器

func NewCacheManager

func NewCacheManager(outputDir string) (*CacheManager, error)

NewCacheManager 创建缓存管理器

func (*CacheManager) Get

func (cm *CacheManager) Get(url string) (string, bool)

Get 从缓存获取HTML

func (*CacheManager) GetCachePath

func (cm *CacheManager) GetCachePath(url string) string

GetCachePath 获取缓存文件路径

func (*CacheManager) GetStats

func (cm *CacheManager) GetStats() map[string]interface{}

GetStats 获取缓存统计

func (*CacheManager) Has

func (cm *CacheManager) Has(url string) bool

Has 检查缓存是否存在

func (*CacheManager) Set

func (cm *CacheManager) Set(url, html string) error

Set 保存HTML到缓存

type CacheMeta

type CacheMeta struct {
	Entries map[string]*CacheEntry `json:"entries"`
}

CacheMeta 缓存元数据

type CatalogConfig

type CatalogConfig struct {
	Site      string         `json:"site"`
	URL       string         `json:"url"`
	Selectors Selectors      `json:"selectors"`
	Items     []*CatalogItem `json:"items"`
}

CatalogConfig 目录配置(从scan命令生成)

type CatalogItem

type CatalogItem struct {
	Title    string         `json:"title"`
	URL      string         `json:"url"`
	Children []*CatalogItem `json:"children,omitempty"`
}

CatalogItem 目录项

type Config

type Config struct {
	Input     string // catalog.json路径
	Output    string // 输出目录
	Workers   int    // 并发数
	Retries   int    // 重试次数
	Timeout   int    // 超时时间
	AppConfig *models.AppConfig
}

Config fetch命令配置

type FetchResult

type FetchResult struct {
	Title      string
	URL        string
	OutputPath string
	WordCount  int
	Success    bool
	Error      error
}

FetchResult 抓取结果

type Processor

type Processor struct {
	// contains filtered or unexported fields
}

Processor fetch处理器

func NewProcessor

func NewProcessor(config *Config, logger *zap.Logger) (*Processor, error)

NewProcessor 创建fetch处理器

func (*Processor) Execute

func (p *Processor) Execute(ctx context.Context) error

Execute 执行抓取

func (*Processor) GetProgress

func (p *Processor) GetProgress() *Progress

GetProgress 获取当前进度

type Progress

type Progress struct {
	Total     int32
	Completed int32
	Failed    int32
	Current   string
}

Progress 进度信息

type Selectors

type Selectors struct {
	TOC         string `json:"toc"`
	Content     string `json:"content"`
	TOCType     string `json:"toc_type,omitempty"`     // 目录选择器类型: css/xpath
	ContentType string `json:"content_type,omitempty"` // 内容选择器类型: css/xpath
}

Selectors 选择器配置

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL