config

package
v0.8.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 5, 2026 License: AGPL-3.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ApplyResourceLimits

func ApplyResourceLimits(cfg *Config)

ApplyResourceLimits configures GOMAXPROCS and memory soft limit based on config. Also auto-adjusts worker count if memory is constrained.

func DefaultDataDir added in v0.8.0

func DefaultDataDir() (string, error)

DefaultDataDir returns the platform-specific application data directory. macOS: ~/Library/Application Support/CrawlObserver Linux: ~/.local/share/crawlobserver Windows: %APPDATA%/CrawlObserver

func SetDefaults

func SetDefaults()

Types

type ClickHouseConfig

type ClickHouseConfig struct {
	Host       string `mapstructure:"host"`
	Port       int    `mapstructure:"port"`
	Database   string `mapstructure:"database"`
	Username   string `mapstructure:"username"`
	Password   string `mapstructure:"password"`
	Mode       string `mapstructure:"mode"`        // "managed" | "external" | "" (auto-detect)
	BinaryPath string `mapstructure:"binary_path"` // path to clickhouse binary, "" = auto-detect
	DataDir    string `mapstructure:"data_dir"`    // data directory, "" = platform default
}

func (ClickHouseConfig) DSN

func (c ClickHouseConfig) DSN() string

DSN returns a redacted connection string safe for logging.

type Config

type Config struct {
	Crawler    CrawlerConfig    `mapstructure:"crawler"`
	ClickHouse ClickHouseConfig `mapstructure:"clickhouse"`
	Storage    StorageConfig    `mapstructure:"storage"`
	Resources  ResourcesConfig  `mapstructure:"resources"`
	Server     ServerConfig     `mapstructure:"server"`
	Theme      ThemeConfig      `mapstructure:"theme"`
	GSC        GSCConfig        `mapstructure:"gsc"`
}

func Load

func Load() (*Config, error)

type CrawlerConfig

type CrawlerConfig struct {
	Workers               int            `mapstructure:"workers"`
	Delay                 time.Duration  `mapstructure:"delay"`
	MaxPages              int            `mapstructure:"max_pages"`
	MaxDepth              int            `mapstructure:"max_depth"`
	Timeout               time.Duration  `mapstructure:"timeout"`
	UserAgent             string         `mapstructure:"user_agent"`
	MaxBodySize           int64          `mapstructure:"max_body_size"`
	RespectRobots         bool           `mapstructure:"respect_robots"`
	StoreHTML             bool           `mapstructure:"store_html"`
	CrawlScope            string         `mapstructure:"crawl_scope"`             // "host" (default), "domain" (eTLD+1), or "subdirectory"
	AllowPrivateIPs       bool           `mapstructure:"allow_private_ips"`       // allow crawling private/reserved IPs (default: false)
	TLSProfile            string         `mapstructure:"tls_profile"`             // "", "chrome", "firefox", "edge"
	SourceIP              string         `mapstructure:"source_ip"`               // local IP to bind outgoing connections
	ForceIPv4             bool           `mapstructure:"force_ipv4"`              // force IPv4-only DNS and connections
	MaxConcurrentSessions int            `mapstructure:"max_concurrent_sessions"` // 0 = 20
	MaxFrontierSize       int            `mapstructure:"max_frontier_size"`       // 0 = 5_000_000
	MaxWorkers            int            `mapstructure:"max_workers"`             // 0 = 100
	Retry                 RetryConfig    `mapstructure:"retry"`
	JSRender              JSRenderConfig `mapstructure:"js_render"`
}

type GSCConfig

type GSCConfig struct {
	ClientID     string `mapstructure:"client_id"`
	ClientSecret string `mapstructure:"client_secret"`
	RedirectURI  string `mapstructure:"redirect_uri"`
}

type JSRenderConfig

type JSRenderConfig struct {
	Mode           string        `mapstructure:"mode"`            // "off" (default), "auto", "always"
	MaxPages       int           `mapstructure:"max_pages"`       // concurrent Chrome pages (default: 4)
	PageTimeout    time.Duration `mapstructure:"page_timeout"`    // per-page timeout (default: 15s)
	BlockResources bool          `mapstructure:"block_resources"` // block images/fonts (default: true)
}

type RateLimitConfig

type RateLimitConfig struct {
	Enabled            bool    `mapstructure:"enabled"`
	RequestsPerSecond  float64 `mapstructure:"requests_per_second"`
	Burst              int     `mapstructure:"burst"`
	AuthRequestsPerMin int     `mapstructure:"auth_requests_per_minute"`
}

type ResourcesConfig

type ResourcesConfig struct {
	MaxMemoryMB int `mapstructure:"max_memory_mb"` // soft limit, 0 = auto (75% of system RAM)
	MaxCPU      int `mapstructure:"max_cpu"`       // GOMAXPROCS, 0 = all available
}

type RetryConfig

type RetryConfig struct {
	MaxRetries          int           `mapstructure:"max_retries"`
	BaseDelay           time.Duration `mapstructure:"base_delay"`
	MaxDelay            time.Duration `mapstructure:"max_delay"`
	MaxConsecutiveFails int           `mapstructure:"max_consecutive_fails"`
	MaxGlobalErrorRate  float64       `mapstructure:"max_global_error_rate"`
}

type ServerConfig

type ServerConfig struct {
	Host       string          `mapstructure:"host"`
	Port       int             `mapstructure:"port"`
	Username   string          `mapstructure:"username"`
	Password   string          `mapstructure:"password"`
	SQLitePath string          `mapstructure:"sqlite_path"`
	RateLimit  RateLimitConfig `mapstructure:"rate_limit"`
}

type StorageConfig

type StorageConfig struct {
	BatchSize     int           `mapstructure:"batch_size"`
	FlushInterval time.Duration `mapstructure:"flush_interval"`
}

type ThemeConfig

type ThemeConfig struct {
	AppName     string `mapstructure:"app_name" json:"app_name"`
	LogoURL     string `mapstructure:"logo_url" json:"logo_url"`
	AccentColor string `mapstructure:"accent_color" json:"accent_color"`
	Mode        string `mapstructure:"mode" json:"mode"` // "light" or "dark"
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL