Documentation
¶
Overview ¶
Package config provides configuration management for the Hapax LLM server. It includes support for various LLM providers, token validation, caching, and runtime behavior customization.
config_watcher.go
Index ¶
- type BackupProvider
- type CacheConfig
- type CircuitBreakerConfig
- type Config
- type ConfigWatcher
- type HTTP3Config
- type HealthCheck
- type LLMConfig
- type LoggingConfig
- type ProcessingConfig
- type ProviderConfig
- type ProviderHealthCheck
- type QueueConfig
- type RedisCacheConfig
- type ResponseFormattingConfig
- type RetryConfig
- type RouteConfig
- type ServerConfig
- type Watcher
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BackupProvider ¶ added in v0.0.8
type BackupProvider struct {
Provider string `yaml:"provider"`
Model string `yaml:"model"`
APIKey string `yaml:"api_key"`
}
BackupProvider defines a fallback LLM provider
type CacheConfig ¶ added in v0.0.5
type CacheConfig struct {
// Enable turns caching on/off (default: false)
Enable bool `yaml:"enable"`
// Type specifies the caching strategy:
// - "memory": In-memory cache (cleared on restart)
// - "redis": Redis-based persistent cache
// - "file": File-based persistent cache
Type string `yaml:"type"`
// TTL specifies how long to keep cached responses (default: 24h)
TTL time.Duration `yaml:"ttl"`
// MaxSize limits the cache size:
// - For memory cache: maximum number of entries
// - For file cache: maximum total size in bytes
MaxSize int64 `yaml:"max_size"`
// Dir specifies the directory for file-based cache
Dir string `yaml:"dir,omitempty"`
// Redis configuration (only used if Type is "redis")
Redis *RedisCacheConfig `yaml:"redis,omitempty"`
}
CacheConfig defines caching behavior for LLM responses. Caching can significantly improve performance and reduce API costs by storing and reusing responses for identical prompts.
type CircuitBreakerConfig ¶ added in v0.0.11
type CircuitBreakerConfig struct {
// MaxRequests is maximum number of requests allowed to pass through when in half-open state
MaxRequests uint32 `yaml:"max_requests"`
// Interval is the cyclic period of the closed state for the circuit breaker
Interval time.Duration `yaml:"interval"`
// Timeout is the period of the open state until it becomes half-open
Timeout time.Duration `yaml:"timeout"`
// FailureThreshold is the number of failures needed to trip the circuit
FailureThreshold uint32 `yaml:"failure_threshold"`
// TestMode indicates whether to skip Prometheus metric registration (for testing)
TestMode bool `yaml:"test_mode"`
}
type Config ¶
type Config struct {
Server ServerConfig `yaml:"server"`
LLM LLMConfig `yaml:"llm"`
Logging LoggingConfig `yaml:"logging"`
Routes []RouteConfig `yaml:"routes"`
Providers map[string]ProviderConfig `yaml:"providers"`
ProviderPreference []string `yaml:"provider_preference"` // Order of provider preference
CircuitBreaker CircuitBreakerConfig `yaml:"circuit_breaker"`
Queue QueueConfig `yaml:"queue"`
TestMode bool `yaml:"-"` // Skip provider initialization in tests
}
Config represents the complete server configuration. It combines server settings, LLM configuration, logging preferences, and route definitions into a single, cohesive configuration structure.
func DefaultConfig ¶
func DefaultConfig() *Config
DefaultConfig returns a configuration that aligns with the existing validation requirements while keeping the implementation simple and focused on memory caching.
type ConfigWatcher ¶ added in v0.0.16
type ConfigWatcher struct {
// contains filtered or unexported fields
}
ConfigWatcher manages configuration hot reloading
func NewConfigWatcher ¶ added in v0.0.16
func NewConfigWatcher(configPath string, logger *zap.Logger) (*ConfigWatcher, error)
NewConfigWatcher creates a new configuration watcher
func (*ConfigWatcher) Close ¶ added in v0.0.16
func (cw *ConfigWatcher) Close() error
func (*ConfigWatcher) GetCurrentConfig ¶ added in v0.0.16
func (cw *ConfigWatcher) GetCurrentConfig() *Config
GetCurrentConfig returns the current configuration thread-safely
func (*ConfigWatcher) Subscribe ¶ added in v0.0.16
func (cw *ConfigWatcher) Subscribe() <-chan *Config
Subscribe allows components to receive config updates
type HTTP3Config ¶ added in v0.0.24
type HTTP3Config struct {
// Enable HTTP/3 support
Enabled bool `yaml:"enabled"`
// Port for HTTP/3 (QUIC) traffic (default: 443)
Port int `yaml:"port"`
// TLSCertFile is the path to the TLS certificate file
TLSCertFile string `yaml:"tls_cert_file"`
// TLSKeyFile is the path to the TLS private key file
TLSKeyFile string `yaml:"tls_key_file"`
// IdleTimeout is the maximum time to wait for the next request when keep-alives are enabled
IdleTimeout time.Duration `yaml:"idle_timeout"`
// MaxBiStreamsConcurrent is the maximum number of concurrent bidirectional streams
// that a peer is allowed to open. The default is 100.
MaxBiStreamsConcurrent int64 `yaml:"max_bi_streams_concurrent"`
// MaxUniStreamsConcurrent is the maximum number of concurrent unidirectional streams
// that a peer is allowed to open. The default is 100.
MaxUniStreamsConcurrent int64 `yaml:"max_uni_streams_concurrent"`
// MaxStreamReceiveWindow is the stream-level flow control window for receiving data
MaxStreamReceiveWindow uint64 `yaml:"max_stream_receive_window"`
// MaxConnectionReceiveWindow is the connection-level flow control window for receiving data
MaxConnectionReceiveWindow uint64 `yaml:"max_connection_receive_window"`
}
HTTP3Config holds configuration specific to the HTTP/3 server. HTTP/3 requires TLS, so certificate configuration is mandatory.
type HealthCheck ¶ added in v0.0.6
type HealthCheck struct {
// Enabled specifies whether health checks are enabled for this route
Enabled bool `yaml:"enabled"`
// Interval specifies the interval between health checks
Interval time.Duration `yaml:"interval"`
// Timeout specifies the timeout for health checks
Timeout time.Duration `yaml:"timeout"`
// Threshold specifies the number of failures before marking the route as unhealthy
Threshold int `yaml:"threshold"`
// Checks specifies the map of check name to check type
Checks map[string]string `yaml:"checks"`
}
HealthCheck defines health check configuration for a route
type LLMConfig ¶
type LLMConfig struct {
// Provider specifies the LLM provider (e.g., "openai", "anthropic", "ollama")
Provider string `yaml:"provider"`
// Model is the name of the model to use (e.g., "gpt-4", "claude-3-haiku")
Model string `yaml:"model"`
// APIKey is the authentication key for the provider's API
// Use environment variables (e.g., ${OPENAI_API_KEY}) for secure configuration
APIKey string `yaml:"api_key"`
// Endpoint is the API endpoint URL
// For Ollama, this is typically "http://localhost:11434"
Endpoint string `yaml:"endpoint"`
// SystemPrompt is the default system prompt to use
SystemPrompt string `yaml:"system_prompt"`
// MaxContextTokens is the maximum number of tokens in the context window
// This varies by model:
// - GPT-4: 8192 or 32768
// - Claude: 100k
// - Llama2: Varies by version
MaxContextTokens int `yaml:"max_context_tokens"`
// Cache configuration (optional)
Cache *CacheConfig `yaml:"cache,omitempty"`
// Retry configuration (optional)
Retry *RetryConfig `yaml:"retry,omitempty"`
// Options contains provider-specific generation parameters
Options map[string]interface{} `yaml:"options"`
// BackupProviders defines failover providers (optional)
BackupProviders []BackupProvider `yaml:"backup_providers,omitempty"`
// HealthCheck defines provider health monitoring settings (optional)
HealthCheck *ProviderHealthCheck `yaml:"health_check,omitempty"`
}
LLMConfig holds LLM-specific configuration. It supports multiple providers (OpenAI, Anthropic, Ollama) and includes settings for token validation, caching, and generation parameters.
type LoggingConfig ¶
type LoggingConfig struct {
// Level sets logging verbosity: debug, info, warn, error
Level string `yaml:"level"`
// Format specifies log output format: json or text
Format string `yaml:"format"`
}
LoggingConfig holds logging-specific configuration.
type ProcessingConfig ¶ added in v0.0.7
type ProcessingConfig struct {
// RequestTemplates maps template names to their content
RequestTemplates map[string]string `yaml:"request_templates"`
// ResponseFormatting configures how responses should be formatted
ResponseFormatting ResponseFormattingConfig `yaml:"response_formatting"`
}
ProcessingConfig defines the configuration for request/response processing
type ProviderConfig ¶ added in v0.0.11
type ProviderConfig struct {
Type string `yaml:"type"` // Provider type (e.g., openai, anthropic)
Model string `yaml:"model"` // Model name
APIKey string `yaml:"api_key"` // API key for authentication
}
ProviderConfig holds configuration for an LLM provider
type ProviderHealthCheck ¶ added in v0.0.8
type ProviderHealthCheck struct {
Enabled bool `yaml:"enabled"`
Interval time.Duration `yaml:"interval"`
Timeout time.Duration `yaml:"timeout"`
FailureThreshold int `yaml:"failure_threshold"`
}
ProviderHealthCheck defines health check settings
type QueueConfig ¶ added in v0.0.22
type QueueConfig struct {
// Enabled determines if the queue middleware is active
Enabled bool `yaml:"enabled"`
// InitialSize is the starting maximum size of the queue
InitialSize int64 `yaml:"initial_size"`
// StatePath is the file path where queue state is persisted
// If empty, persistence is disabled
StatePath string `yaml:"state_path"`
// SaveInterval is how often the queue state is saved
// If 0, periodic saving is disabled
SaveInterval time.Duration `yaml:"save_interval"`
}
QueueConfig defines the configuration for the request queue middleware. It controls queue size, persistence, and state management.
type RedisCacheConfig ¶ added in v0.0.5
type RedisCacheConfig struct {
// Address is the Redis server address (e.g., "localhost:6379")
Address string `yaml:"address"`
// Password for Redis authentication (optional)
Password string `yaml:"password"`
// DB is the Redis database number to use
DB int `yaml:"db"`
}
RedisCacheConfig holds Redis-specific cache configuration.
type ResponseFormattingConfig ¶ added in v0.0.7
type ResponseFormattingConfig struct {
// CleanJSON enables JSON response cleaning using gollm
CleanJSON bool `yaml:"clean_json"`
// TrimWhitespace removes extra whitespace from responses
TrimWhitespace bool `yaml:"trim_whitespace"`
// MaxLength limits the response length
MaxLength int `yaml:"max_length"`
}
ResponseFormattingConfig defines response formatting options
type RetryConfig ¶ added in v0.0.5
type RetryConfig struct {
// MaxRetries is the maximum number of retry attempts (default: 3)
MaxRetries int `yaml:"max_retries"`
// InitialDelay is the delay before the first retry (default: 1s)
InitialDelay time.Duration `yaml:"initial_delay"`
// MaxDelay caps the maximum delay between retries (default: 30s)
MaxDelay time.Duration `yaml:"max_delay"`
// Multiplier increases the delay after each retry (default: 2)
// The delay pattern will be: initial_delay * (multiplier ^ retry_count)
Multiplier float64 `yaml:"multiplier"`
// RetryableErrors specifies which error types should trigger retries
// Common values: "rate_limit", "timeout", "server_error"
RetryableErrors []string `yaml:"retryable_errors"`
}
RetryConfig defines the retry behavior for failed API calls. This helps handle transient errors and rate limiting gracefully.
type RouteConfig ¶
type RouteConfig struct {
// Path is the URL path to match
Path string `yaml:"path"`
// Handler specifies which handler to use for this route
Handler string `yaml:"handler"`
// Version specifies the API version (e.g., "v1", "v2")
Version string `yaml:"version"`
// Methods specifies the allowed HTTP methods for this route
Methods []string `yaml:"methods"`
// Headers specifies the required headers for this route
Headers map[string]string `yaml:"headers,omitempty"`
// Middleware specifies the route-specific middleware
Middleware []string `yaml:"middleware,omitempty"`
// HealthCheck specifies the health check configuration for this route
HealthCheck *HealthCheck `yaml:"health_check,omitempty"`
}
RouteConfig holds route-specific configuration.
type ServerConfig ¶
type ServerConfig struct {
// Port specifies the HTTP server port (default: 8080)
Port int `yaml:"port"`
// ReadTimeout is the maximum duration for reading the entire request,
// including the body (default: 30s)
ReadTimeout time.Duration `yaml:"read_timeout"`
// WriteTimeout is the maximum duration before timing out writes of the response
// (default: 30s)
WriteTimeout time.Duration `yaml:"write_timeout"`
// MaxHeaderBytes controls the maximum number of bytes the server will
// read parsing the request header's keys and values (default: 1MB)
MaxHeaderBytes int `yaml:"max_header_bytes"`
// ShutdownTimeout specifies how long to wait for the server to shutdown
// gracefully before forcing termination (default: 30s)
ShutdownTimeout time.Duration `yaml:"shutdown_timeout"`
// HTTP3 configuration (optional)
HTTP3 *HTTP3Config `yaml:"http3,omitempty"`
}
ServerConfig holds server-specific configuration for the HTTP server. It defines timeouts, limits, and operational parameters.