Documentation
¶
Index ¶
- Constants
- func NewTestSuits(t *testing.T) *testSuits
- type AbsProxyStorage
- type AnonymityLevel
- type BaseRule
- type CommandDownloadFailLimitRule
- type CommandFailedCntLimitRule
- type Context
- func (c *Context) Doc() *goquery.Document
- func (c *Context) Follow(link string, callback OnParseCallback, data ...H)
- func (c *Context) FollowWithBuilder(link string, callback OnParseCallback, fBuilder *FollowBuilder)
- func (c *Context) Get(key string) (interface{}, bool)
- func (c *Context) HTML(selector string, callback func(element *HTMLElement)) int
- func (c *Context) Item(item interface{})
- func (c *Context) MayDoc() (*goquery.Document, error)
- func (c *Context) MayHTML(selector string, callback func(element *HTMLElement)) (int, error)
- func (c *Context) NewFollowBuilder() *FollowBuilder
- func (c *Context) Retry()
- func (c *Context) SaveResource(link string, fileName string)
- func (c *Context) Set(key string, value interface{})
- type DownloadTimeoutRule
- type Executor
- type FollowBuilder
- func (b FollowBuilder) Callback(callback OnParseCallback) *commandBuilder
- func (b FollowBuilder) ContextData(data H) *commandBuilder
- func (b FollowBuilder) Cookie(key, val string) *commandBuilder
- func (b FollowBuilder) DownloadTimeout(timeout time.Duration) *commandBuilder
- func (b FollowBuilder) Link(link string) *commandBuilder
- func (b FollowBuilder) UserAgent(agent string) *commandBuilder
- type H
- type HTMLElement
- func (e *HTMLElement) Attr(key string) string
- func (e *HTMLElement) ChildAttr(selector, key string) string
- func (e *HTMLElement) ChildText(selector string) string
- func (e *HTMLElement) ChildrenAttrs(selector, attrname string) []string
- func (e *HTMLElement) ChildrenTexts(selector string) []string
- func (e *HTMLElement) ForEach(selector string, callback func(element *HTMLElement))
- func (e *HTMLElement) MayAttr(key string) string
- func (e *HTMLElement) MayChildAttr(selector, key string) string
- func (e *HTMLElement) MayChildText(selector string) string
- func (e *HTMLElement) MayChildrenAttrs(selector, attrname string) []string
- func (e *HTMLElement) MayChildrenTexts(selector string) []string
- func (e *HTMLElement) MayForEach(selector string, callback func(element *HTMLElement))
- func (e *HTMLElement) Text() string
- type JsonFilePipeline
- type JsonStdoutPipeline
- type NoProxyFastHTTPDownloaderFactory
- type OnDownloadFinishCallback
- type OnDownloadFinishRule
- type OnParseCallback
- type OnParseErrorCallback
- type OnParseErrorRule
- type OnPipeErrorCallback
- type OnPipeErrorRule
- type ParseErrorInfo
- type ParseErrorKind
- type PipeErrorInfo
- type PipeErrorKind
- type Pipeline
- type PipelineRule
- type Proxy
- type ProxyFastHTTPDownloaderFactory
- type ProxyPool
- type Task
- type TaskNameRule
Constants ¶
View Source
const ( DefaultDownloadTimeout = time.Second * 20 DefaultCommandFailedCntLimit = 60 )
Variables ¶
This section is empty.
Functions ¶
func NewTestSuits ¶
Types ¶
type AbsProxyStorage ¶
type AbsProxyStorage interface {
GetProxy(proxy *Proxy) (*Proxy, error)
HasProxy(proxy *Proxy) (bool, error)
CreateProxy(proxy *Proxy) error
CreateProxyList([]*Proxy) int
ActivateProxy(proxy *Proxy) error
DeactivateProxy(proxy *Proxy) error
GetTopKProxyList(k int) ([]*Proxy, error)
GetRandTopKProxy(k int) (*Proxy, error)
GetAllProxy() ([]*Proxy, error)
GetProxyListWithRefuseList(refuseList []*Proxy, count int) ([]*Proxy, error)
GetRandProxyWithRefuseList([]*Proxy) (*Proxy, error)
}
func ProxyStorageSingleton ¶
func ProxyStorageSingleton() AbsProxyStorage
type AnonymityLevel ¶
type AnonymityLevel int
const ( Transparent AnonymityLevel = iota Anonymous Elite )
type CommandDownloadFailLimitRule ¶
type CommandDownloadFailLimitRule interface {
CommandDownloadFailLimit() int
}
type CommandFailedCntLimitRule ¶
type CommandFailedCntLimitRule interface {
CommandFailedCntLimit() int
}
type Context ¶
type Context struct {
// contains filtered or unexported fields
}
func (*Context) FollowWithBuilder ¶
func (c *Context) FollowWithBuilder(link string, callback OnParseCallback, fBuilder *FollowBuilder)
func (*Context) HTML ¶
func (c *Context) HTML(selector string, callback func(element *HTMLElement)) int
func (*Context) Item ¶
func (c *Context) Item(item interface{})
add new item in order to be pipelined
func (*Context) MayHTML ¶
func (c *Context) MayHTML(selector string, callback func(element *HTMLElement)) (int, error)
func (*Context) NewFollowBuilder ¶
func (c *Context) NewFollowBuilder() *FollowBuilder
func (*Context) SaveResource ¶
save link's resource to instance/[taskName].[taskID]/[fileName]
type DownloadTimeoutRule ¶
type Executor ¶
type Executor struct {
// contains filtered or unexported fields
}
Executor is a main part of cobweb it accepts rule and creates task according to that.
func NewDefaultExecutor ¶
func NewDefaultExecutor() *Executor
func NewExecutor ¶
func NewExecutorWithSimpleDownloaderManager ¶
func NewExecutorWithSimpleDownloaderManager() *Executor
func NewNoProxyDefaultExecutor ¶
func NewNoProxyDefaultExecutor() *Executor
func (*Executor) AcceptRule ¶
accept rule and create task for it if executor is not running return nil
type FollowBuilder ¶
type FollowBuilder struct {
// contains filtered or unexported fields
}
Builder pattern for Context.FollowWithBuilder
func (FollowBuilder) Callback ¶
func (b FollowBuilder) Callback(callback OnParseCallback) *commandBuilder
func (FollowBuilder) ContextData ¶
func (b FollowBuilder) ContextData(data H) *commandBuilder
func (FollowBuilder) DownloadTimeout ¶
type HTMLElement ¶
type HTMLElement struct {
// contains filtered or unexported fields
}
represent for one element in html doc
func (*HTMLElement) Attr ¶
func (e *HTMLElement) Attr(key string) string
func (*HTMLElement) ChildAttr ¶
func (e *HTMLElement) ChildAttr(selector, key string) string
func (*HTMLElement) ChildText ¶
func (e *HTMLElement) ChildText(selector string) string
func (*HTMLElement) ChildrenAttrs ¶
func (e *HTMLElement) ChildrenAttrs(selector, attrname string) []string
func (*HTMLElement) ChildrenTexts ¶
func (e *HTMLElement) ChildrenTexts(selector string) []string
func (*HTMLElement) ForEach ¶
func (e *HTMLElement) ForEach(selector string, callback func(element *HTMLElement))
func (*HTMLElement) MayAttr ¶
func (e *HTMLElement) MayAttr(key string) string
func (*HTMLElement) MayChildAttr ¶
func (e *HTMLElement) MayChildAttr(selector, key string) string
func (*HTMLElement) MayChildText ¶
func (e *HTMLElement) MayChildText(selector string) string
func (*HTMLElement) MayChildrenAttrs ¶
func (e *HTMLElement) MayChildrenAttrs(selector, attrname string) []string
func (*HTMLElement) MayChildrenTexts ¶
func (e *HTMLElement) MayChildrenTexts(selector string) []string
func (*HTMLElement) MayForEach ¶
func (e *HTMLElement) MayForEach(selector string, callback func(element *HTMLElement))
func (*HTMLElement) Text ¶
func (e *HTMLElement) Text() string
type JsonFilePipeline ¶
type JsonFilePipeline struct {
// contains filtered or unexported fields
}
func (*JsonFilePipeline) Close ¶
func (p *JsonFilePipeline) Close()
func (*JsonFilePipeline) Pipe ¶
func (p *JsonFilePipeline) Pipe(info *itemInfo)
type JsonStdoutPipeline ¶
type JsonStdoutPipeline struct {
}
func (*JsonStdoutPipeline) Close ¶
func (p *JsonStdoutPipeline) Close()
func (*JsonStdoutPipeline) Pipe ¶
func (p *JsonStdoutPipeline) Pipe(info *itemInfo)
type NoProxyFastHTTPDownloaderFactory ¶
type NoProxyFastHTTPDownloaderFactory struct {
}
type OnDownloadFinishCallback ¶
type OnDownloadFinishCallback func(ctx *Context)
type OnDownloadFinishRule ¶
type OnDownloadFinishRule interface {
OnDownloadFinish(ctx *Context)
}
type OnParseCallback ¶
type OnParseCallback func(ctx *Context)
type OnParseErrorCallback ¶
type OnParseErrorCallback func(info *ParseErrorInfo)
type OnParseErrorRule ¶
type OnParseErrorRule interface {
OnParseError(info *ParseErrorInfo)
}
type OnPipeErrorCallback ¶
type OnPipeErrorCallback func(info *PipeErrorInfo)
type OnPipeErrorRule ¶
type OnPipeErrorRule interface {
OnPipeError(info *PipeErrorInfo)
}
type ParseErrorInfo ¶
type ParseErrorInfo struct {
Ctx *Context
ErrKind ParseErrorKind
PanicValue interface{}
}
type ParseErrorKind ¶
type ParseErrorKind int
const ( UnknownParseError ParseErrorKind = iota ParseHTMLError HTMLNodeNotFoundError )
func (ParseErrorKind) String ¶
func (kind ParseErrorKind) String() string
type PipeErrorInfo ¶
type PipeErrorInfo struct {
Ctx *Context
ErrKind PipeErrorKind
PanicInfo interface{}
}
type PipelineRule ¶
type PipelineRule interface {
Pipelines() []Pipeline
}
type Proxy ¶
type Proxy struct {
ID int `gorm:"PRIMARY_KEY;AUTO_INCREMENT;"`
Host string
Port string
HTTPS bool
Anonymity AnonymityLevel
Score int
}
代理结构体
func (*Proxy) FastHTTPDialHTTPProxy ¶
func (*Proxy) GetProxyURL ¶
type ProxyFastHTTPDownloaderFactory ¶
type ProxyFastHTTPDownloaderFactory struct {
}
type ProxyPool ¶
type ProxyPool struct {
// contains filtered or unexported fields
}
func NewProxyPool ¶
type TaskNameRule ¶
type TaskNameRule interface {
TaskName() string
}
Click to show internal directories.
Click to hide internal directories.