Documentation
¶
Index ¶
- Constants
- Variables
- func Banner()
- func CreateSeedJobs(fastmode bool, langCode string, r io.Reader, maxDepth int, email bool, ...) (jobs []scrapemate.IJob, err error)
- func CreateSeedJobsFromKeywords(cfg SeedJobConfig) ([]scrapemate.IJob, error)
- func FormatGeoCoordinates(lat, lon float64) string
- func LoadCustomWriter(pluginDir, pluginName string) (scrapemate.ResultWriter, error)
- func Telemetry() tlmt.Telemetry
- type Config
- type Runner
- type S3Uploader
- type SeedJobConfig
Constants ¶
View Source
const ( RunModeFile = iota + 1 RunModeDatabase RunModeDatabaseProduce RunModeInstallPlaywright RunModeAwsLambda RunModeAwsLambdaInvoker RunModeManager RunModeWorker )
Variables ¶
View Source
var ( // Version is the current application version // It is injected at build time via -ldflags Version = "dev" // BuildDate is the timestamp of the build // It is injected at build time via -ldflags BuildDate = "unknown" // Commit is the git commit hash // It is injected at build time via -ldflags Commit = "none" )
View Source
var (
ErrInvalidRunMode = errors.New("invalid run mode")
)
Functions ¶
func CreateSeedJobs ¶
func CreateSeedJobsFromKeywords ¶
func CreateSeedJobsFromKeywords(cfg SeedJobConfig) ([]scrapemate.IJob, error)
CreateSeedJobsFromKeywords creates seed jobs from a slice of keywords. This is a reusable wrapper for CreateSeedJobs that accepts []string instead of io.Reader. Used by both CLI and API (Dashboard).
func FormatGeoCoordinates ¶
FormatGeoCoordinates formats latitude and longitude into a string. Returns empty string if both are zero.
func LoadCustomWriter ¶
func LoadCustomWriter(pluginDir, pluginName string) (scrapemate.ResultWriter, error)
Types ¶
type Config ¶
type Config struct {
Concurrency int
CacheDir string
MaxDepth int
InputFile string
ResultsFile string
JSON bool
LangCode string
Debug bool
Dsn string
ProduceOnly bool
ExitOnInactivityDuration time.Duration
Email bool
CustomWriter string
GeoCoordinates string
Zoom int
RunMode int
DisableTelemetry bool
AwsLamdbaRunner bool
DataFolder string
Proxies []string
AwsAccessKey string
AwsSecretKey string
AwsRegion string
S3Uploader S3Uploader
S3Bucket string
AwsLambdaInvoker bool
FunctionName string
AwsLambdaChunkSize int
FastMode bool
Radius float64
Addr string
DisablePageReuse bool
ExtraReviews bool
LeadsDBAPIKey string
// Manager/Worker mode flags
ManagerMode bool
WorkerMode bool
ManagerURL string
WorkerID string
// StaticFolder is the path to static frontend files
StaticFolder string
// Redis configuration for cache and deduplication
RedisURL string
RedisAddr string
RedisPass string
RedisDB int
// RabbitMQ configuration for job queue
RabbitMQURL string
// ProxyGate flags
ProxyGateEnabled bool
ProxyGateAddr string
ProxyGateSources []string
ProxyGateRefreshInterval time.Duration
// Email validation (Mordibouncer)
EmailValidatorURL string
EmailValidatorKey string
// Migration flags
Migrate bool // Run migration only, then exit
MigrateStatus bool // Check migration status and exit
// Auto-spawn configuration (Manager mode)
SpawnerType string // none, docker, swarm, lambda
SpawnerImage string // Docker image for worker containers
SpawnerNetwork string // Docker network to attach workers
SpawnerConcurrency int // Concurrency per spawned worker
SpawnerMaxWorkers int // Max concurrent workers (0 = unlimited)
SpawnerAutoRemove bool // Auto-remove containers after exit
SpawnerLabels map[string]string // Labels for spawned containers
SpawnerConstraints []string // Swarm placement constraints
SpawnerManagerURL string // Manager URL for spawned workers (default: auto-detect)
SpawnerProxies string // Proxy URL for spawned workers (e.g., socks5://manager:8081)
// AWS Lambda spawner configuration
SpawnerLambdaFunction string // Lambda function name/ARN
SpawnerLambdaRegion string // AWS region (defaults to AwsRegion)
SpawnerLambdaInvocation string // Event (async) or RequestResponse (sync)
SpawnerLambdaMaxConc int // Max concurrent Lambda invocations
}
func ParseConfig ¶
func ParseConfig() *Config
type S3Uploader ¶
type SeedJobConfig ¶
type SeedJobConfig struct {
Keywords []string
FastMode bool
LangCode string
Depth int
Email bool
GeoCoordinates string // "lat,lon" or ""
Zoom int
Radius float64
ExtraReviews bool
Dedup deduper.Deduper
ExitMonitor exiter.Exiter
EmailValidator emailvalidator.Validator
}
SeedJobConfig for creating seed jobs from API
Click to show internal directories.
Click to hide internal directories.