Versions in this module Expand all Collapse all v0 v0.5.0 Mar 23, 2023 Changes in this version + func NewRdbClient(config *DistributedWorkerConfig) *redis.Client + func NewRdbClusterCLient(config *WorkerConfigWithRdbCluster) *redis.ClusterClient + func NewRdbConfig(config *DistributedWorkerConfig) *redis.Options + type CrawlMetricCollector struct + func NewCrawlMetricCollector(serverURL string, token string, bucket string, org string) *CrawlMetricCollector + func (c *CrawlMetricCollector) Get(metric string) uint64 + func (c *CrawlMetricCollector) GetAllStats() map[string]uint64 + func (c *CrawlMetricCollector) Incr(metric string) + func (c *CrawlMetricCollector) SetCurrentSpider(spider tegenaria.SpiderInterface) + type DistributeDupefilterOptions func(w *DistributedDupefilter) + type DistributeOptions func(w *DistributedWorkerConfig) + func DistributedWithBloomN(bloomN int) DistributeOptions + func DistributedWithBloomP(bloomP float64) DistributeOptions + func DistributedWithConnectionsSize(size int) DistributeOptions + func DistributedWithGetBFKey(keyFunc GetRDBKey) DistributeOptions + func DistributedWithGetLimitKey(keyFunc GetRDBKey) DistributeOptions + func DistributedWithGetQueueKey(keyFunc GetRDBKey) DistributeOptions + func DistributedWithLimiterRate(rate int) DistributeOptions + func DistributedWithRdbMaxRetry(retry int) DistributeOptions + func DistributedWithRdbTimeout(timeout time.Duration) DistributeOptions + func DistributedWithSlave() DistributeOptions + type DistributeQueueOptions func(w *DistributedQueue) + type DistributedComponents struct + func NewDefaultDistributedComponents(opts ...DistributeOptions) *DistributedComponents + func NewDistributedComponents(config *DistributedWorkerConfig, worker tegenaria.DistributedWorkerInterface, ...) *DistributedComponents + func (d *DistributedComponents) CheckWorkersStop() bool + func (d *DistributedComponents) GetDupefilter() tegenaria.RFPDupeFilterInterface + func (d *DistributedComponents) GetEventHooks() tegenaria.EventHooksInterface + func (d *DistributedComponents) GetLimiter() tegenaria.LimitInterface + func (d *DistributedComponents) GetQueue() tegenaria.CacheInterface + func (d *DistributedComponents) GetStats() tegenaria.StatisticInterface + func (d *DistributedComponents) SetCurrentSpider(spider tegenaria.SpiderInterface) + func (d *DistributedComponents) SpiderBeforeStart(engine *tegenaria.CrawlEngine, spider tegenaria.SpiderInterface) error + type DistributedDupefilter struct + func NewDistributedDupefilter(bloomN int, bloomP float64, rdb redis.Cmdable, bfKeyFunc GetRDBKey) *DistributedDupefilter + func (d *DistributedDupefilter) Add(fingerprint []byte) error + func (d *DistributedDupefilter) DoDupeFilter(ctx *tegenaria.Context) (bool, error) + func (d *DistributedDupefilter) Fingerprint(ctx *tegenaria.Context) ([]byte, error) + func (d *DistributedDupefilter) SetCurrentSpider(spider tegenaria.SpiderInterface) + func (d *DistributedDupefilter) TestOrAdd(fingerprint []byte) (bool, error) + type DistributedHooks struct + func NewDistributedHooks(worker tegenaria.DistributedWorkerInterface) *DistributedHooks + func (d *DistributedHooks) Error(params ...interface{}) error + func (d *DistributedHooks) EventsWatcher(ch chan tegenaria.EventType) error + func (d *DistributedHooks) Exit(params ...interface{}) error + func (d *DistributedHooks) Heartbeat(params ...interface{}) error + func (d *DistributedHooks) Pause(params ...interface{}) error + func (d *DistributedHooks) SetCurrentSpider(spider tegenaria.SpiderInterface) + func (d *DistributedHooks) Start(params ...interface{}) error + type DistributedQueue struct + func NewDistributedQueue(rdb redis.Cmdable, queueKey GetRDBKey) *DistributedQueue + func (d *DistributedQueue) Close() error + func (d *DistributedQueue) Dequeue() (interface{}, error) + func (d *DistributedQueue) Enqueue(ctx *tegenaria.Context) error + func (d *DistributedQueue) GetSize() uint64 + func (d *DistributedQueue) IsEmpty() bool + func (d *DistributedQueue) SetCurrentSpider(spider tegenaria.SpiderInterface) + type DistributedWorker struct + func NewDistributedWorker(config *DistributedWorkerConfig) *DistributedWorker + func NewWorkerWithRdbCluster(config *WorkerConfigWithRdbCluster) *DistributedWorker + func (w *DistributedWorker) AddNode() error + func (w *DistributedWorker) CheckAllNodesStop() (bool, error) + func (w *DistributedWorker) CheckMasterLive() (bool, error) + func (w *DistributedWorker) DelMaster() error + func (w *DistributedWorker) DelNode() error + func (w *DistributedWorker) GetRDB() redis.Cmdable + func (w *DistributedWorker) GetWorkerID() string + func (w *DistributedWorker) Heartbeat() error + func (w *DistributedWorker) IsMaster() bool + func (w *DistributedWorker) PauseNode() error + func (w *DistributedWorker) SetCurrentSpider(spider tegenaria.SpiderInterface) + func (w *DistributedWorker) SetMaster(flag bool) + type DistributedWorkerConfig struct + LimiterRate int + func NewDistributedWorkerConfig(rdbConfig *RedisConfig, influxdbConfig *InfluxdbConfig, ...) *DistributedWorkerConfig + type GetRDBKey func() (string, time.Duration) + type InfluxdbConfig struct + func NewInfluxdbConfig(server string, token string, bucket string, org string) *InfluxdbConfig + type LeakyBucketLimiterWithRdb struct + func NewLeakyBucketLimiterWithRdb(safetyLevel int, rdb redis.Cmdable, keyFunc GetRDBKey) *LeakyBucketLimiterWithRdb + func (l *LeakyBucketLimiterWithRdb) CheckAndWaitLimiterPass() error + func (l *LeakyBucketLimiterWithRdb) SetCurrentSpider(spider tegenaria.SpiderInterface) + type RdbNodes []string + type RedisConfig struct + RdbConnectionsSize uint64 + RdbMaxRetry int + RdbTimeout time.Duration + RedisAddr string + RedisDB uint32 + RedisPasswd string + RedisUsername string + func NewRedisConfig(addr string, username string, passwd string, db uint32) *RedisConfig + type WorkerConfigWithRdbCluster struct + func NewWorkerConfigWithRdbCluster(config *DistributedWorkerConfig, nodes RdbNodes) *WorkerConfigWithRdbCluster