Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Cache ¶
type Cache interface {
AddRequest(req *Request) error
VisitedURL(req *Request) (bool, error)
Clear() error
}
Cache holds visited urls to prevent revisitation
type LocalCache ¶
type LocalCache struct {
// contains filtered or unexported fields
}
LocalCache holds urls in maps. Safe for use by multiple goroutines.
func NewCache ¶
func NewCache() *LocalCache
func (*LocalCache) AddRequest ¶
func (c *LocalCache) AddRequest(req *Request) error
AddRequest adds a request url to the cache.
func (*LocalCache) Clear ¶
func (c *LocalCache) Clear() error
func (*LocalCache) VisitedURL ¶
func (c *LocalCache) VisitedURL(req *Request) (bool, error)
VisitedURL returns true if the request url has been visited before.
type Queue ¶
type Queue interface {
io.Closer
// Enqueue adds the request to the queue, returns an error if no more space is available.
Enqueue(req *Request, priority int) error
// Dequeue pops the highest priority request from the queue.
Dequeue() <-chan QueueResult
// Count returns the amount of queued requests.
Count() (int, error)
Clear()
}
Queue is a prioritized FIFO queue for requests
type QueueMaxSize ¶
type QueueMaxSize struct {
// contains filtered or unexported fields
}
QueueMaxSize signals the Queue has reached its maximum size.
func (QueueMaxSize) Error ¶
func (r QueueMaxSize) Error() string
type QueueResult ¶
type RedisCache ¶
type RedisCache struct {
// contains filtered or unexported fields
}
func NewRedisCache ¶
func (*RedisCache) AddRequest ¶
func (r *RedisCache) AddRequest(req *Request) error
func (*RedisCache) Clear ¶
func (r *RedisCache) Clear() error
func (*RedisCache) VisitedURL ¶
func (r *RedisCache) VisitedURL(req *Request) (bool, error)
type RedisQueue ¶
type RedisQueue struct {
// contains filtered or unexported fields
}
func NewRedisQueue ¶
func (*RedisQueue) Clear ¶
func (r *RedisQueue) Clear()
func (*RedisQueue) Close ¶
func (r *RedisQueue) Close() error
func (*RedisQueue) Count ¶
func (r *RedisQueue) Count() (int, error)
func (*RedisQueue) Dequeue ¶
func (r *RedisQueue) Dequeue() <-chan QueueResult
type Request ¶
Request contains the to-be-visited URL as well as the origin domain.
func NewRequest ¶
NewRequest will return a Request with absolute URL, converting relative URL's to absolute ones as needed. Returns an error if the URL could not be parsed.
func (*Request) MarshalJSON ¶
type RequestHeapQueue ¶
type RequestHeapQueue struct {
// contains filtered or unexported fields
}
RequestHeapQueue is a heap implementation for request.Queue.
func BuildHeap ¶
func BuildHeap(data []heapNode, maxSize int) *RequestHeapQueue
BuildHeap builds a request heap from existing data.
func NewRequestHeap ¶
func NewRequestHeap(maxSize int) *RequestHeapQueue
NewRequestHeap returns a request heap (priority queue).
func (*RequestHeapQueue) Clear ¶
func (r *RequestHeapQueue) Clear()
func (*RequestHeapQueue) Close ¶
func (r *RequestHeapQueue) Close() error
func (*RequestHeapQueue) Count ¶
func (r *RequestHeapQueue) Count() (int, error)
Count returns the amount of requests in the queue.
func (*RequestHeapQueue) Dequeue ¶
func (r *RequestHeapQueue) Dequeue() <-chan QueueResult
type Response ¶
Response holds the original Request, as well as the http Response and goquery document. Response instances can be searched by using qoquery methods.
func NewResponse ¶
NewResponse returns a Response. Returns an error if the response body could not be parsed by goquery.