Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CacheEntry ¶
type CacheEntry struct {
URL string `json:"url"`
File string `json:"file"` // 缓存文件名(MD5.html)
FetchedAt string `json:"fetched_at"` // 抓取时间
Status int `json:"status"` // HTTP状态码(预留)
}
CacheEntry 缓存条目
type CacheManager ¶
type CacheManager struct {
// contains filtered or unexported fields
}
CacheManager HTML缓存管理器
func NewCacheManager ¶
func NewCacheManager(outputDir string) (*CacheManager, error)
NewCacheManager 创建缓存管理器
func (*CacheManager) GetCachePath ¶
func (cm *CacheManager) GetCachePath(url string) string
GetCachePath 获取缓存文件路径
func (*CacheManager) GetStats ¶
func (cm *CacheManager) GetStats() map[string]interface{}
GetStats 获取缓存统计
type CacheMeta ¶
type CacheMeta struct {
Entries map[string]*CacheEntry `json:"entries"`
}
CacheMeta 缓存元数据
type CatalogConfig ¶
type CatalogConfig struct {
Site string `json:"site"`
URL string `json:"url"`
Selectors Selectors `json:"selectors"`
Items []*CatalogItem `json:"items"`
}
CatalogConfig 目录配置(从scan命令生成)
type CatalogItem ¶
type CatalogItem struct {
Title string `json:"title"`
URL string `json:"url"`
Children []*CatalogItem `json:"children,omitempty"`
}
CatalogItem 目录项
type Config ¶
type Config struct {
Input string // catalog.json路径
Output string // 输出目录
Workers int // 并发数
Retries int // 重试次数
Timeout int // 超时时间
AppConfig *models.AppConfig
}
Config fetch命令配置
type FetchResult ¶
type FetchResult struct {
Title string
URL string
OutputPath string
WordCount int
Success bool
Error error
}
FetchResult 抓取结果
type Processor ¶
type Processor struct {
// contains filtered or unexported fields
}
Processor fetch处理器
func NewProcessor ¶
NewProcessor 创建fetch处理器
Click to show internal directories.
Click to hide internal directories.