Documentation
¶
Index ¶
- type Crawler
- func (c *Crawler) RegisterTaskTypes(dummyTasks ...Task) (err error)
- func (c *Crawler) Request(tasks ...Task) (err error)
- func (c *Crawler) RequestWithOptions(task Task, opts RequestOptions) (err error)
- func (c *Crawler) Run()
- func (c *Crawler) TaskLog(level logger.Level, message string, values logger.Fields)
- func (c *Crawler) WaitQuit() (errs []error)
- type CrawlerOptions
- type FilterResult
- type RequestOptions
- type Task
- type WithFilter
- type WithPayloadDecorator
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Crawler ¶
type Crawler struct {
Global map[string]interface{}
// contains filtered or unexported fields
}
Crawler 是爬虫
func NewCrawler ¶
func NewCrawler(opts CrawlerOptions) (c *Crawler, err error)
NewCrawler 返回一个新的 Crawler
func (*Crawler) RegisterTaskTypes ¶
RegisterTaskTypes 注册任务类型
func (*Crawler) RequestWithOptions ¶
func (c *Crawler) RequestWithOptions(task Task, opts RequestOptions) (err error)
RequestWithOptions 添加一个任务请求, 带有设置
type CrawlerOptions ¶
type CrawlerOptions struct {
// Dir 是爬虫的工作目录. 若 dir 已存在则尝试从 dir 复原爬虫的工作状态, 若 dir 不存在则创建之
Dir string
// NoDefaultFileLogger 若为真, 则不使用默认的 stdout logger
NoDefaultStdoutLogger bool
// DefaultLoggerLevel 是爬虫默认 stdout logger 的级别
// 如果为 0 则会认作 LInfo
DefaultStdoutLoggerLogLevel logger.Level
// NoDefaultFileLogger 若为真, 则不使用默认的 file logger
NoDefaultFileLogger bool
// DefaultLoggerLevel 是爬虫默认 file logger 的级别
// 如果为 0 则会认作 LInfo
DefaultFileLoggerLogLevel logger.Level
// Logger logger, 若 RemoveDefaultLogger 为真, 则为唯一的 logger, 否则与默认 logger 共存
Logger logger.Logger
// Scheduler 如果不为 nil, 则将替换掉爬虫的默认调度器
Scheduler scheduler.Scheduler
// DefaultWokerPoolWorkers 是默认任务池的 worker 数
DefaultWorkerPoolWorkers int
Mode scheduler.Mode
}
CrawlerOptions 包含了爬虫的各类设置选项
type FilterResult ¶
type FilterResult int
FilterResult 是 filter 返回的结果
const ( // FilterResultPass 同 ProcessResultSuccessful FilterResultPass FilterResult = FilterResult(taskqueue.ProcessResultSuccessful) // FilterResultShouldBeExcluded 同 ProcessResultShouldBeExcluded FilterResultShouldBeExcluded FilterResult = FilterResult(taskqueue.ProcessResultShouldBeExcluded) // FilterResultShouldBeFrozen 同 ProcessResultShouldBeFrozen FilterResultShouldBeFrozen FilterResult = FilterResult(taskqueue.ProcessResultShouldBeFrozen) )
type Task ¶
type Task interface {
Fetch(c *Crawler, self Task) (payload interface{}, err error)
Process(c *Crawler, self Task, payload interface{}) (result taskqueue.ProcessResult, err error)
}
Task 是爬虫任务的接口
type WithFilter ¶
type WithFilter interface {
Filter(c *Crawler, self Task) (FilterResult, error)
}
WithFilter 实现此接口的任务会被调用 filter 以进行过滤
type WithPayloadDecorator ¶
type WithPayloadDecorator interface {
DecoratePayload(c *Crawler, self Task, payload interface{}) (decoratedPayload interface{}, err error)
}
WithPayloadDecorator 对 fetch 返回的 payload 进行装饰
Click to show internal directories.
Click to hide internal directories.