Documentation
¶
Index ¶
- func ApplyResourceLimits(cfg *Config)
- func DefaultDataDir() (string, error)
- func SetDefaults()
- func WriteConfig() error
- type ClickHouseConfig
- type Config
- type CrawlerConfig
- type GSCConfig
- type JSRenderConfig
- type RateLimitConfig
- type ResourcesConfig
- type RetryConfig
- type ServerConfig
- type StorageConfig
- type TelemetryConfig
- type ThemeConfig
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ApplyResourceLimits ¶
func ApplyResourceLimits(cfg *Config)
ApplyResourceLimits configures GOMAXPROCS and memory soft limit based on config. Also auto-adjusts worker count if memory is constrained.
func DefaultDataDir ¶ added in v0.8.0
DefaultDataDir returns the platform-specific application data directory. macOS: ~/Library/Application Support/CrawlObserver Linux: ~/.local/share/crawlobserver Windows: %APPDATA%/CrawlObserver
func SetDefaults ¶
func SetDefaults()
func WriteConfig ¶ added in v0.8.3
func WriteConfig() error
WriteConfig writes the current viper config to disk, creating it if needed.
Types ¶
type ClickHouseConfig ¶
type ClickHouseConfig struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
Database string `mapstructure:"database"`
Username string `mapstructure:"username"`
Password string `mapstructure:"password"`
Mode string `mapstructure:"mode"` // "managed" | "external" | "" (auto-detect)
BinaryPath string `mapstructure:"binary_path"` // path to clickhouse binary, "" = auto-detect
DataDir string `mapstructure:"data_dir"` // data directory, "" = platform default
}
func (ClickHouseConfig) DSN ¶
func (c ClickHouseConfig) DSN() string
DSN returns a redacted connection string safe for logging.
type Config ¶
type Config struct {
Crawler CrawlerConfig `mapstructure:"crawler"`
ClickHouse ClickHouseConfig `mapstructure:"clickhouse"`
Storage StorageConfig `mapstructure:"storage"`
Resources ResourcesConfig `mapstructure:"resources"`
Server ServerConfig `mapstructure:"server"`
Theme ThemeConfig `mapstructure:"theme"`
GSC GSCConfig `mapstructure:"gsc"`
Telemetry TelemetryConfig `mapstructure:"telemetry"`
SetupComplete bool `mapstructure:"setup_complete"`
}
type CrawlerConfig ¶
type CrawlerConfig struct {
Workers int `mapstructure:"workers"`
Delay time.Duration `mapstructure:"delay"`
MaxPages int `mapstructure:"max_pages"`
MaxDepth int `mapstructure:"max_depth"`
Timeout time.Duration `mapstructure:"timeout"`
UserAgent string `mapstructure:"user_agent"`
MaxBodySize int64 `mapstructure:"max_body_size"`
RespectRobots bool `mapstructure:"respect_robots"`
StoreHTML bool `mapstructure:"store_html"`
CrawlScope string `mapstructure:"crawl_scope"` // "host" (default), "domain" (eTLD+1), or "subdirectory"
AllowPrivateIPs bool `mapstructure:"allow_private_ips"` // allow crawling private/reserved IPs (default: false)
TLSProfile string `mapstructure:"tls_profile"` // "", "chrome", "firefox", "edge"
SourceIP string `mapstructure:"source_ip"` // local IP to bind outgoing connections
ForceIPv4 bool `mapstructure:"force_ipv4"` // force IPv4-only DNS and connections
MaxConcurrentSessions int `mapstructure:"max_concurrent_sessions"` // 0 = 20
MaxFrontierSize int `mapstructure:"max_frontier_size"` // 0 = 5_000_000
MaxWorkers int `mapstructure:"max_workers"` // 0 = 100
Retry RetryConfig `mapstructure:"retry"`
JSRender JSRenderConfig `mapstructure:"js_render"`
}
type JSRenderConfig ¶
type JSRenderConfig struct {
Mode string `mapstructure:"mode"` // "off" (default), "auto", "always"
MaxPages int `mapstructure:"max_pages"` // concurrent Chrome pages (default: 4)
PageTimeout time.Duration `mapstructure:"page_timeout"` // per-page timeout (default: 15s)
BlockResources bool `mapstructure:"block_resources"` // block images/fonts (default: true)
}
type RateLimitConfig ¶
type ResourcesConfig ¶
type RetryConfig ¶
type RetryConfig struct {
MaxRetries int `mapstructure:"max_retries"`
BaseDelay time.Duration `mapstructure:"base_delay"`
MaxDelay time.Duration `mapstructure:"max_delay"`
MaxConsecutiveFails int `mapstructure:"max_consecutive_fails"`
MaxGlobalErrorRate float64 `mapstructure:"max_global_error_rate"`
}
type ServerConfig ¶
type ServerConfig struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
Username string `mapstructure:"username"`
Password string `mapstructure:"password"`
SQLitePath string `mapstructure:"sqlite_path"`
RateLimit RateLimitConfig `mapstructure:"rate_limit"`
PasswordGenerated bool `mapstructure:"-"` // transient, not persisted
WeakPassword bool `mapstructure:"-"` // transient, not persisted
}
type StorageConfig ¶
type TelemetryConfig ¶ added in v0.8.2
type TelemetryConfig struct {
Enabled bool `mapstructure:"enabled"`
InstanceID string `mapstructure:"instance_id"`
AskedAt string `mapstructure:"asked_at"` // ISO timestamp when user was asked about telemetry
SessionRecording bool `mapstructure:"session_recording"` // WARNING: records full browser sessions — all page content, URLs, and clicks are sent to PostHog
}
Click to show internal directories.
Click to hide internal directories.