config

package
v0.10.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 7, 2026 License: AGPL-3.0 Imports: 12 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ApplyResourceLimits

func ApplyResourceLimits(cfg *Config)

ApplyResourceLimits configures GOMAXPROCS and memory soft limit based on config. Also auto-adjusts worker count if memory is constrained.

func DefaultDataDir added in v0.8.0

func DefaultDataDir() (string, error)

DefaultDataDir returns the platform-specific application data directory. macOS: ~/Library/Application Support/CrawlObserver Linux: ~/.local/share/crawlobserver Windows: %APPDATA%/CrawlObserver

func SetDefaults

func SetDefaults()

func WriteConfig added in v0.8.3

func WriteConfig() error

WriteConfig writes the current viper config to disk, creating it if needed.

Types

type ClickHouseConfig

type ClickHouseConfig struct {
	Host       string `mapstructure:"host"`
	Port       int    `mapstructure:"port"`
	Database   string `mapstructure:"database"`
	Username   string `mapstructure:"username"`
	Password   string `mapstructure:"password"`
	Mode       string `mapstructure:"mode"`        // "managed" | "external" | "" (auto-detect)
	BinaryPath string `mapstructure:"binary_path"` // path to clickhouse binary, "" = auto-detect
	DataDir    string `mapstructure:"data_dir"`    // data directory, "" = platform default
}

func (ClickHouseConfig) DSN

func (c ClickHouseConfig) DSN() string

DSN returns a redacted connection string safe for logging.

type Config

type Config struct {
	Crawler       CrawlerConfig    `mapstructure:"crawler"`
	ClickHouse    ClickHouseConfig `mapstructure:"clickhouse"`
	Storage       StorageConfig    `mapstructure:"storage"`
	Resources     ResourcesConfig  `mapstructure:"resources"`
	Server        ServerConfig     `mapstructure:"server"`
	Theme         ThemeConfig      `mapstructure:"theme"`
	GSC           GSCConfig        `mapstructure:"gsc"`
	Telemetry     TelemetryConfig  `mapstructure:"telemetry"`
	SetupComplete bool             `mapstructure:"setup_complete"`
}

func Load

func Load() (*Config, error)

type CrawlerConfig

type CrawlerConfig struct {
	Workers               int            `mapstructure:"workers"`
	Delay                 time.Duration  `mapstructure:"delay"`
	MaxPages              int            `mapstructure:"max_pages"`
	MaxDepth              int            `mapstructure:"max_depth"`
	Timeout               time.Duration  `mapstructure:"timeout"`
	UserAgent             string         `mapstructure:"user_agent"`
	MaxBodySize           int64          `mapstructure:"max_body_size"`
	RespectRobots         bool           `mapstructure:"respect_robots"`
	StoreHTML             bool           `mapstructure:"store_html"`
	CrawlScope            string         `mapstructure:"crawl_scope"`             // "host" (default), "domain" (eTLD+1), or "subdirectory"
	AllowPrivateIPs       bool           `mapstructure:"allow_private_ips"`       // allow crawling private/reserved IPs (default: false)
	TLSProfile            string         `mapstructure:"tls_profile"`             // "", "chrome", "firefox", "edge"
	SourceIP              string         `mapstructure:"source_ip"`               // local IP to bind outgoing connections
	ForceIPv4             bool           `mapstructure:"force_ipv4"`              // force IPv4-only DNS and connections
	MaxConcurrentSessions int            `mapstructure:"max_concurrent_sessions"` // 0 = 20
	MaxFrontierSize       int            `mapstructure:"max_frontier_size"`       // 0 = 5_000_000
	MaxWorkers            int            `mapstructure:"max_workers"`             // 0 = 100
	Retry                 RetryConfig    `mapstructure:"retry"`
	JSRender              JSRenderConfig `mapstructure:"js_render"`
}

type GSCConfig

type GSCConfig struct {
	ClientID     string `mapstructure:"client_id"`
	ClientSecret string `mapstructure:"client_secret"`
	RedirectURI  string `mapstructure:"redirect_uri"`
}

type JSRenderConfig

type JSRenderConfig struct {
	Mode           string        `mapstructure:"mode"`            // "off" (default), "auto", "always"
	MaxPages       int           `mapstructure:"max_pages"`       // concurrent Chrome pages (default: 4)
	PageTimeout    time.Duration `mapstructure:"page_timeout"`    // per-page timeout (default: 15s)
	BlockResources bool          `mapstructure:"block_resources"` // block images/fonts (default: true)
}

type RateLimitConfig

type RateLimitConfig struct {
	Enabled            bool    `mapstructure:"enabled"`
	RequestsPerSecond  float64 `mapstructure:"requests_per_second"`
	Burst              int     `mapstructure:"burst"`
	AuthRequestsPerMin int     `mapstructure:"auth_requests_per_minute"`
}

type ResourcesConfig

type ResourcesConfig struct {
	MaxMemoryMB int `mapstructure:"max_memory_mb"` // soft limit, 0 = auto (75% of system RAM)
	MaxCPU      int `mapstructure:"max_cpu"`       // GOMAXPROCS, 0 = all available
}

type RetryConfig

type RetryConfig struct {
	MaxRetries          int           `mapstructure:"max_retries"`
	BaseDelay           time.Duration `mapstructure:"base_delay"`
	MaxDelay            time.Duration `mapstructure:"max_delay"`
	MaxConsecutiveFails int           `mapstructure:"max_consecutive_fails"`
	MaxGlobalErrorRate  float64       `mapstructure:"max_global_error_rate"`
}

type ServerConfig

type ServerConfig struct {
	Host              string          `mapstructure:"host"`
	Port              int             `mapstructure:"port"`
	Username          string          `mapstructure:"username"`
	Password          string          `mapstructure:"password"`
	SQLitePath        string          `mapstructure:"sqlite_path"`
	RateLimit         RateLimitConfig `mapstructure:"rate_limit"`
	PasswordGenerated bool            `mapstructure:"-"` // transient, not persisted
	WeakPassword      bool            `mapstructure:"-"` // transient, not persisted
}

type StorageConfig

type StorageConfig struct {
	BatchSize     int           `mapstructure:"batch_size"`
	FlushInterval time.Duration `mapstructure:"flush_interval"`
}

type TelemetryConfig added in v0.8.2

type TelemetryConfig struct {
	Enabled          bool   `mapstructure:"enabled"`
	InstanceID       string `mapstructure:"instance_id"`
	AskedAt          string `mapstructure:"asked_at"`          // ISO timestamp when user was asked about telemetry
	SessionRecording bool   `mapstructure:"session_recording"` // WARNING: records full browser sessions — all page content, URLs, and clicks are sent to PostHog
}

type ThemeConfig

type ThemeConfig struct {
	AppName     string `mapstructure:"app_name" json:"app_name"`
	LogoURL     string `mapstructure:"logo_url" json:"logo_url"`
	AccentColor string `mapstructure:"accent_color" json:"accent_color"`
	Mode        string `mapstructure:"mode" json:"mode"` // "light" or "dark"
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL