Documentation
¶
Index ¶
- type HealthMonitor
- func (hm *HealthMonitor) GetAllHealth() map[string]*ServiceHealth
- func (hm *HealthMonitor) GetHealth(serviceID string) *ServiceHealth
- func (hm *HealthMonitor) IsHealthy(serviceID string) bool
- func (hm *HealthMonitor) RemoveHealth(serviceID string)
- func (hm *HealthMonitor) ReportAuthError(serviceID string, statusCode int)
- func (hm *HealthMonitor) ReportError(serviceID string, err error)
- func (hm *HealthMonitor) ReportRateLimit(serviceID string)
- func (hm *HealthMonitor) ReportSuccess(serviceID string)
- func (hm *HealthMonitor) ResetHealth(serviceID string)
- func (hm *HealthMonitor) SetProbeFunc(fn HealthProbeFunc)
- func (hm *HealthMonitor) UpdateConfig(config HealthMonitorConfig)
- type HealthMonitorConfig
- type HealthProbeFunc
- type HealthStatus
- type Service
- type ServiceHealth
- type ServiceID
- type ServiceStats
- func (ss *ServiceStats) GetCacheStats() (hitRate float64, hits int64, misses int64)
- func (ss *ServiceStats) GetCostMetrics() int64
- func (ss *ServiceStats) GetLatencyStats() (avg, p50, p95, p99 float64, sampleCount int)
- func (ss *ServiceStats) GetStats() ServiceStats
- func (ss *ServiceStats) GetTTFTStats() (avg, p50, p95, p99 float64, sampleCount int)
- func (ss *ServiceStats) GetTokenSpeedStats() (avgSpeed float64, sampleCount int)
- func (ss *ServiceStats) GetWindowStats() (requestCount int64, tokensConsumed int64)
- func (ss *ServiceStats) GetWindowTokenDetails() (requestCount int64, inputTokens int64, outputTokens int64)
- func (ss *ServiceStats) IsWindowExpired() bool
- func (ss *ServiceStats) RecordCacheHit(isHit bool)
- func (ss *ServiceStats) RecordLatency(latencyMs int64, maxSamples int)
- func (ss *ServiceStats) RecordTTFT(ttftMs int64, maxSamples int)
- func (ss *ServiceStats) RecordTokenSpeed(speedTps float64, maxSamples int)
- func (ss *ServiceStats) RecordUsage(inputTokens, outputTokens int)
- func (ss *ServiceStats) ResetWindow()
- type TacticType
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type HealthMonitor ¶
type HealthMonitor struct {
// contains filtered or unexported fields
}
HealthMonitor manages health status for all services
func NewHealthMonitor ¶
func NewHealthMonitor(config HealthMonitorConfig) *HealthMonitor
NewHealthMonitor creates a new health monitor with the given configuration
func (*HealthMonitor) GetAllHealth ¶
func (hm *HealthMonitor) GetAllHealth() map[string]*ServiceHealth
GetAllHealth returns health status for all services
func (*HealthMonitor) GetHealth ¶
func (hm *HealthMonitor) GetHealth(serviceID string) *ServiceHealth
GetHealth returns the health status for a service
func (*HealthMonitor) IsHealthy ¶
func (hm *HealthMonitor) IsHealthy(serviceID string) bool
IsHealthy checks if a service is healthy (with time-based recovery and probing)
func (*HealthMonitor) RemoveHealth ¶
func (hm *HealthMonitor) RemoveHealth(serviceID string)
RemoveHealth removes a service's health record
func (*HealthMonitor) ReportAuthError ¶
func (hm *HealthMonitor) ReportAuthError(serviceID string, statusCode int)
ReportAuthError reports a 401/403 auth error for a service
func (*HealthMonitor) ReportError ¶
func (hm *HealthMonitor) ReportError(serviceID string, err error)
ReportError reports a retryable error for a service
func (*HealthMonitor) ReportRateLimit ¶
func (hm *HealthMonitor) ReportRateLimit(serviceID string)
ReportRateLimit reports a 429 rate limit error for a service
func (*HealthMonitor) ReportSuccess ¶
func (hm *HealthMonitor) ReportSuccess(serviceID string)
ReportSuccess reports a successful request for a service
func (*HealthMonitor) ResetHealth ¶
func (hm *HealthMonitor) ResetHealth(serviceID string)
ResetHealth manually resets a service's health to healthy
func (*HealthMonitor) SetProbeFunc ¶
func (hm *HealthMonitor) SetProbeFunc(fn HealthProbeFunc)
SetProbeFunc sets the probe function for health checking during recovery
func (*HealthMonitor) UpdateConfig ¶
func (hm *HealthMonitor) UpdateConfig(config HealthMonitorConfig)
UpdateConfig updates the health monitor configuration
type HealthMonitorConfig ¶
type HealthMonitorConfig struct {
// ConsecutiveErrorThreshold is the number of consecutive errors before marking unhealthy
ConsecutiveErrorThreshold int `json:"consecutive_error_threshold" yaml:"consecutive_error_threshold"`
// RecoveryTimeoutSeconds is the time in seconds before auto-recovery
RecoveryTimeoutSeconds int `json:"recovery_timeout_seconds" yaml:"recovery_timeout_seconds"`
// ProbeEnabled enables health check probing before marking service healthy
ProbeEnabled bool `json:"probe_enabled" yaml:"probe_enabled"`
}
HealthMonitorConfig holds configuration for health monitoring
func DefaultHealthMonitorConfig ¶
func DefaultHealthMonitorConfig() HealthMonitorConfig
DefaultHealthMonitorConfig returns default configuration
type HealthProbeFunc ¶
HealthProbeFunc is the function type for probing service health Returns true if service is healthy, false otherwise
type HealthStatus ¶
type HealthStatus int
HealthStatus represents the health state of a service
const ( HealthHealthy HealthStatus = iota // Service is healthy and available HealthUnhealthy // Service is unhealthy (rate limited, failing) )
func (HealthStatus) String ¶
func (h HealthStatus) String() string
String returns the string representation of HealthStatus
type Service ¶
type Service struct {
Provider string `yaml:"provider" json:"provider"` // Provider name / uuid
Model string `yaml:"model" json:"model"` // Model name
Weight int `yaml:"weight" json:"weight"` // Weight for load balancing
Active bool `yaml:"active" json:"active"` // Whether this service is active
TimeWindow int `yaml:"time_window" json:"time_window"` // Statistics time window in seconds
ModelCapacity *int `yaml:"model_capacity,omitempty" json:"model_capacity,omitempty"` // ModelCapacity overrides the provider's default_model_capacity for this specific model
Stats ServiceStats `yaml:"-" json:"-"` // Service usage statistics (stored in SQLite, not in config)
}
Service represents a provider-model combination for load balancing
func (*Service) GetServiceID ¶ added in v0.260409.1540
GetServiceID returns the typed service identifier.
func (*Service) GetWindowStats ¶
GetWindowStats returns current window statistics for this service
func (*Service) InitializeStats ¶
func (s *Service) InitializeStats()
InitializeStats initializes the service statistics if they are empty
func (*Service) RecordUsage ¶
RecordUsage records usage for this service
type ServiceHealth ¶
type ServiceHealth struct {
ServiceID string // Unique service identifier (provider:model)
Status HealthStatus // Current health status
LastError error // Last error that caused unhealthy state
LastErrorTime time.Time // When the error occurred
ConsecutiveErrors int // Count of consecutive errors
RateLimited bool // True if last error was 429
AuthError bool // True if last error was 401/403
LastHealthCheck time.Time // Last time health was checked
RecoveryTimeout time.Duration // Time before auto-recovery
// contains filtered or unexported fields
}
ServiceHealth tracks health information for a single service
type ServiceID ¶ added in v0.260409.1540
ServiceID uniquely identifies a provider+model combination in load balancing.
func NewServiceID ¶ added in v0.260409.1540
NewServiceID creates a ServiceID from provider UUID/name and model.
type ServiceStats ¶
type ServiceStats struct {
ServiceID string `json:"service_id"` // Unique service identifier
RequestCount int64 `json:"request_count"` // Total request count
LastUsed time.Time `json:"last_used"` // Last usage timestamp
WindowStart time.Time `json:"window_start"` // Current time window start
WindowRequestCount int64 `json:"window_request_count"` // Requests in current window
WindowTokensConsumed int64 `json:"window_tokens_consumed"` // Tokens consumed in current window (input + output)
WindowInputTokens int64 `json:"window_input_tokens"` // Input tokens in current window
WindowOutputTokens int64 `json:"window_output_tokens"` // Output tokens in current window
TimeWindow int `json:"time_window"` // Copy of service's time window
// Latency tracking fields
LatencySamples []int64 `json:"-"` // Rolling window of latency samples (in ms)
AvgLatencyMs float64 `json:"avg_latency_ms"` // Average latency in current window
P50LatencyMs float64 `json:"p50_latency_ms"` // 50th percentile latency
P95LatencyMs float64 `json:"p95_latency_ms"` // 95th percentile latency
P99LatencyMs float64 `json:"p99_latency_ms"` // 99th percentile latency
LastLatencyUpdate time.Time `json:"last_latency_update"` // When latency was last updated
// Token speed tracking fields (tokens per second)
SpeedSamples []float64 `json:"-"` // Rolling window of token speed samples
AvgTokenSpeed float64 `json:"avg_token_speed"` // Average tokens per second
LastSpeedUpdate time.Time `json:"last_speed_update"` // When speed was last updated
// TTFT (Time To First Token) tracking fields
TTFTSamples []int64 `json:"-"` // Rolling window of TTFT samples (in ms)
AvgTTFTMs float64 `json:"avg_ttft_ms"` // Average TTFT in milliseconds
P50TTFTMs float64 `json:"p50_ttft_ms"` // 50th percentile TTFT
P95TTFTMs float64 `json:"p95_ttft_ms"` // 95th percentile TTFT
P99TTFTMs float64 `json:"p99_ttft_ms"` // 99th percentile TTFT
LastTTFTUpdate time.Time `json:"last_ttft_update"` // When TTFT was last updated
// Cache tracking fields
WindowCacheHits int64 `json:"window_cache_hits"` // Cache hits in current window
WindowCacheMisses int64 `json:"window_cache_misses"` // Cache misses in current window
CacheHitRate float64 `json:"cache_hit_rate"` // Cache hit rate (hits / total)
// Cost tracking fields (token-based)
WindowCostTokens int64 `json:"window_cost_tokens"` // Total tokens as cost proxy in current window
// contains filtered or unexported fields
}
ServiceStats tracks usage statistics for a service
func (*ServiceStats) GetCacheStats ¶
func (ss *ServiceStats) GetCacheStats() (hitRate float64, hits int64, misses int64)
GetCacheStats returns current cache statistics
func (*ServiceStats) GetCostMetrics ¶
func (ss *ServiceStats) GetCostMetrics() int64
GetCostMetrics returns cost-related metrics (token-based)
func (*ServiceStats) GetLatencyStats ¶
func (ss *ServiceStats) GetLatencyStats() (avg, p50, p95, p99 float64, sampleCount int)
GetLatencyStats returns current latency statistics
func (*ServiceStats) GetStats ¶
func (ss *ServiceStats) GetStats() ServiceStats
GetStats returns a copy of current statistics
func (*ServiceStats) GetTTFTStats ¶
func (ss *ServiceStats) GetTTFTStats() (avg, p50, p95, p99 float64, sampleCount int)
GetTTFTStats returns current TTFT statistics
func (*ServiceStats) GetTokenSpeedStats ¶
func (ss *ServiceStats) GetTokenSpeedStats() (avgSpeed float64, sampleCount int)
GetTokenSpeedStats returns current token speed statistics
func (*ServiceStats) GetWindowStats ¶
func (ss *ServiceStats) GetWindowStats() (requestCount int64, tokensConsumed int64)
GetWindowStats returns current window statistics
func (*ServiceStats) GetWindowTokenDetails ¶
func (ss *ServiceStats) GetWindowTokenDetails() (requestCount int64, inputTokens int64, outputTokens int64)
GetWindowTokenDetails returns current window input and output token details
func (*ServiceStats) IsWindowExpired ¶
func (ss *ServiceStats) IsWindowExpired() bool
IsWindowExpired checks if the current time window has expired
func (*ServiceStats) RecordCacheHit ¶
func (ss *ServiceStats) RecordCacheHit(isHit bool)
RecordCacheHit records a cache hit or miss event
func (*ServiceStats) RecordLatency ¶
func (ss *ServiceStats) RecordLatency(latencyMs int64, maxSamples int)
RecordLatency records a latency sample for this service
func (*ServiceStats) RecordTTFT ¶
func (ss *ServiceStats) RecordTTFT(ttftMs int64, maxSamples int)
RecordTTFT records a Time To First Token sample (in milliseconds)
func (*ServiceStats) RecordTokenSpeed ¶
func (ss *ServiceStats) RecordTokenSpeed(speedTps float64, maxSamples int)
RecordTokenSpeed records a token speed sample (tokens per second)
func (*ServiceStats) RecordUsage ¶
func (ss *ServiceStats) RecordUsage(inputTokens, outputTokens int)
RecordUsage records a usage event for this service
func (*ServiceStats) ResetWindow ¶
func (ss *ServiceStats) ResetWindow()
ResetWindow resets the time window statistics
type TacticType ¶
type TacticType int
TacticType represents different load balancing strategies
const ( TacticTokenBased TacticType // Rotate by token consumption TacticRandom // Random selection with weighted probability TacticLatencyBased // Route based on response latency TacticSpeedBased // Route based on token generation speed TacticAdaptive // Composite multi-dimensional routing TacticCapacityBased // 6: NEW - capacity-based load balancing )
func ParseTacticType ¶
func ParseTacticType(s string) TacticType
ParseTacticType parses string to TacticType
func (TacticType) MarshalJSON ¶
func (tt TacticType) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler for TacticType
func (TacticType) String ¶
func (tt TacticType) String() string
String returns string representation of TacticType
func (*TacticType) UnmarshalJSON ¶
func (tt *TacticType) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler for TacticType