loadbalance

package
v0.26041.1000-preview Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 11, 2026 License: MPL-2.0 Imports: 5 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type HealthMonitor

type HealthMonitor struct {
	// contains filtered or unexported fields
}

HealthMonitor manages health status for all services

func NewHealthMonitor

func NewHealthMonitor(config HealthMonitorConfig) *HealthMonitor

NewHealthMonitor creates a new health monitor with the given configuration

func (*HealthMonitor) GetAllHealth

func (hm *HealthMonitor) GetAllHealth() map[string]*ServiceHealth

GetAllHealth returns health status for all services

func (*HealthMonitor) GetHealth

func (hm *HealthMonitor) GetHealth(serviceID string) *ServiceHealth

GetHealth returns the health status for a service

func (*HealthMonitor) IsHealthy

func (hm *HealthMonitor) IsHealthy(serviceID string) bool

IsHealthy checks if a service is healthy (with time-based recovery and probing)

func (*HealthMonitor) RemoveHealth

func (hm *HealthMonitor) RemoveHealth(serviceID string)

RemoveHealth removes a service's health record

func (*HealthMonitor) ReportAuthError

func (hm *HealthMonitor) ReportAuthError(serviceID string, statusCode int)

ReportAuthError reports a 401/403 auth error for a service

func (*HealthMonitor) ReportError

func (hm *HealthMonitor) ReportError(serviceID string, err error)

ReportError reports a retryable error for a service

func (*HealthMonitor) ReportRateLimit

func (hm *HealthMonitor) ReportRateLimit(serviceID string)

ReportRateLimit reports a 429 rate limit error for a service

func (*HealthMonitor) ReportSuccess

func (hm *HealthMonitor) ReportSuccess(serviceID string)

ReportSuccess reports a successful request for a service

func (*HealthMonitor) ResetHealth

func (hm *HealthMonitor) ResetHealth(serviceID string)

ResetHealth manually resets a service's health to healthy

func (*HealthMonitor) SetProbeFunc

func (hm *HealthMonitor) SetProbeFunc(fn HealthProbeFunc)

SetProbeFunc sets the probe function for health checking during recovery

func (*HealthMonitor) UpdateConfig

func (hm *HealthMonitor) UpdateConfig(config HealthMonitorConfig)

UpdateConfig updates the health monitor configuration

type HealthMonitorConfig

type HealthMonitorConfig struct {
	// ConsecutiveErrorThreshold is the number of consecutive errors before marking unhealthy
	ConsecutiveErrorThreshold int `json:"consecutive_error_threshold" yaml:"consecutive_error_threshold"`
	// RecoveryTimeoutSeconds is the time in seconds before auto-recovery
	RecoveryTimeoutSeconds int `json:"recovery_timeout_seconds" yaml:"recovery_timeout_seconds"`
	// ProbeEnabled enables health check probing before marking service healthy
	ProbeEnabled bool `json:"probe_enabled" yaml:"probe_enabled"`
}

HealthMonitorConfig holds configuration for health monitoring

func DefaultHealthMonitorConfig

func DefaultHealthMonitorConfig() HealthMonitorConfig

DefaultHealthMonitorConfig returns default configuration

type HealthProbeFunc

type HealthProbeFunc func(serviceID string) bool

HealthProbeFunc is the function type for probing service health Returns true if service is healthy, false otherwise

type HealthStatus

type HealthStatus int

HealthStatus represents the health state of a service

const (
	HealthHealthy   HealthStatus = iota // Service is healthy and available
	HealthUnhealthy                     // Service is unhealthy (rate limited, failing)
)

func (HealthStatus) String

func (h HealthStatus) String() string

String returns the string representation of HealthStatus

type Service

type Service struct {
	Provider      string       `yaml:"provider" json:"provider"`                                 // Provider name / uuid
	Model         string       `yaml:"model" json:"model"`                                       // Model name
	Weight        int          `yaml:"weight" json:"weight"`                                     // Weight for load balancing
	Active        bool         `yaml:"active" json:"active"`                                     // Whether this service is active
	TimeWindow    int          `yaml:"time_window" json:"time_window"`                           // Statistics time window in seconds
	ModelCapacity *int         `yaml:"model_capacity,omitempty" json:"model_capacity,omitempty"` // ModelCapacity overrides the provider's default_model_capacity for this specific model
	Stats         ServiceStats `yaml:"-" json:"-"`                                               // Service usage statistics (stored in SQLite, not in config)
}

Service represents a provider-model combination for load balancing

func (*Service) GetServiceID added in v0.260409.1540

func (s *Service) GetServiceID() ServiceID

GetServiceID returns the typed service identifier.

func (*Service) GetWindowStats

func (s *Service) GetWindowStats() (requestCount int64, tokensConsumed int64)

GetWindowStats returns current window statistics for this service

func (*Service) InitializeStats

func (s *Service) InitializeStats()

InitializeStats initializes the service statistics if they are empty

func (*Service) RecordUsage

func (s *Service) RecordUsage(inputTokens, outputTokens int)

RecordUsage records usage for this service

func (*Service) ServiceID

func (s *Service) ServiceID() string

ServiceID returns a unique string identifier for the service (provider:model). Deprecated: use GetServiceID() for the typed form.

type ServiceHealth

type ServiceHealth struct {
	ServiceID         string        // Unique service identifier (provider:model)
	Status            HealthStatus  // Current health status
	LastError         error         // Last error that caused unhealthy state
	LastErrorTime     time.Time     // When the error occurred
	ConsecutiveErrors int           // Count of consecutive errors
	RateLimited       bool          // True if last error was 429
	AuthError         bool          // True if last error was 401/403
	LastHealthCheck   time.Time     // Last time health was checked
	RecoveryTimeout   time.Duration // Time before auto-recovery
	// contains filtered or unexported fields
}

ServiceHealth tracks health information for a single service

type ServiceID added in v0.260409.1540

type ServiceID struct {
	ProviderUUID string `json:"provider_uuid"`
	Model        string `json:"model"`
}

ServiceID uniquely identifies a provider+model combination in load balancing.

func NewServiceID added in v0.260409.1540

func NewServiceID(providerUUID, model string) ServiceID

NewServiceID creates a ServiceID from provider UUID/name and model.

func (ServiceID) String added in v0.260409.1540

func (id ServiceID) String() string

String returns a stable string for use as map key and logging.

type ServiceStats

type ServiceStats struct {
	ServiceID            string    `json:"service_id"`             // Unique service identifier
	RequestCount         int64     `json:"request_count"`          // Total request count
	LastUsed             time.Time `json:"last_used"`              // Last usage timestamp
	WindowStart          time.Time `json:"window_start"`           // Current time window start
	WindowRequestCount   int64     `json:"window_request_count"`   // Requests in current window
	WindowTokensConsumed int64     `json:"window_tokens_consumed"` // Tokens consumed in current window (input + output)
	WindowInputTokens    int64     `json:"window_input_tokens"`    // Input tokens in current window
	WindowOutputTokens   int64     `json:"window_output_tokens"`   // Output tokens in current window
	TimeWindow           int       `json:"time_window"`            // Copy of service's time window

	// Latency tracking fields
	LatencySamples    []int64   `json:"-"`                   // Rolling window of latency samples (in ms)
	AvgLatencyMs      float64   `json:"avg_latency_ms"`      // Average latency in current window
	P50LatencyMs      float64   `json:"p50_latency_ms"`      // 50th percentile latency
	P95LatencyMs      float64   `json:"p95_latency_ms"`      // 95th percentile latency
	P99LatencyMs      float64   `json:"p99_latency_ms"`      // 99th percentile latency
	LastLatencyUpdate time.Time `json:"last_latency_update"` // When latency was last updated

	// Token speed tracking fields (tokens per second)
	SpeedSamples    []float64 `json:"-"`                 // Rolling window of token speed samples
	AvgTokenSpeed   float64   `json:"avg_token_speed"`   // Average tokens per second
	LastSpeedUpdate time.Time `json:"last_speed_update"` // When speed was last updated

	// TTFT (Time To First Token) tracking fields
	TTFTSamples    []int64   `json:"-"`                // Rolling window of TTFT samples (in ms)
	AvgTTFTMs      float64   `json:"avg_ttft_ms"`      // Average TTFT in milliseconds
	P50TTFTMs      float64   `json:"p50_ttft_ms"`      // 50th percentile TTFT
	P95TTFTMs      float64   `json:"p95_ttft_ms"`      // 95th percentile TTFT
	P99TTFTMs      float64   `json:"p99_ttft_ms"`      // 99th percentile TTFT
	LastTTFTUpdate time.Time `json:"last_ttft_update"` // When TTFT was last updated

	// Cache tracking fields
	WindowCacheHits   int64   `json:"window_cache_hits"`   // Cache hits in current window
	WindowCacheMisses int64   `json:"window_cache_misses"` // Cache misses in current window
	CacheHitRate      float64 `json:"cache_hit_rate"`      // Cache hit rate (hits / total)

	// Cost tracking fields (token-based)
	WindowCostTokens int64 `json:"window_cost_tokens"` // Total tokens as cost proxy in current window
	// contains filtered or unexported fields
}

ServiceStats tracks usage statistics for a service

func (*ServiceStats) GetCacheStats

func (ss *ServiceStats) GetCacheStats() (hitRate float64, hits int64, misses int64)

GetCacheStats returns current cache statistics

func (*ServiceStats) GetCostMetrics

func (ss *ServiceStats) GetCostMetrics() int64

GetCostMetrics returns cost-related metrics (token-based)

func (*ServiceStats) GetLatencyStats

func (ss *ServiceStats) GetLatencyStats() (avg, p50, p95, p99 float64, sampleCount int)

GetLatencyStats returns current latency statistics

func (*ServiceStats) GetStats

func (ss *ServiceStats) GetStats() ServiceStats

GetStats returns a copy of current statistics

func (*ServiceStats) GetTTFTStats

func (ss *ServiceStats) GetTTFTStats() (avg, p50, p95, p99 float64, sampleCount int)

GetTTFTStats returns current TTFT statistics

func (*ServiceStats) GetTokenSpeedStats

func (ss *ServiceStats) GetTokenSpeedStats() (avgSpeed float64, sampleCount int)

GetTokenSpeedStats returns current token speed statistics

func (*ServiceStats) GetWindowStats

func (ss *ServiceStats) GetWindowStats() (requestCount int64, tokensConsumed int64)

GetWindowStats returns current window statistics

func (*ServiceStats) GetWindowTokenDetails

func (ss *ServiceStats) GetWindowTokenDetails() (requestCount int64, inputTokens int64, outputTokens int64)

GetWindowTokenDetails returns current window input and output token details

func (*ServiceStats) IsWindowExpired

func (ss *ServiceStats) IsWindowExpired() bool

IsWindowExpired checks if the current time window has expired

func (*ServiceStats) RecordCacheHit

func (ss *ServiceStats) RecordCacheHit(isHit bool)

RecordCacheHit records a cache hit or miss event

func (*ServiceStats) RecordLatency

func (ss *ServiceStats) RecordLatency(latencyMs int64, maxSamples int)

RecordLatency records a latency sample for this service

func (*ServiceStats) RecordTTFT

func (ss *ServiceStats) RecordTTFT(ttftMs int64, maxSamples int)

RecordTTFT records a Time To First Token sample (in milliseconds)

func (*ServiceStats) RecordTokenSpeed

func (ss *ServiceStats) RecordTokenSpeed(speedTps float64, maxSamples int)

RecordTokenSpeed records a token speed sample (tokens per second)

func (*ServiceStats) RecordUsage

func (ss *ServiceStats) RecordUsage(inputTokens, outputTokens int)

RecordUsage records a usage event for this service

func (*ServiceStats) ResetWindow

func (ss *ServiceStats) ResetWindow()

ResetWindow resets the time window statistics

type TacticType

type TacticType int

TacticType represents different load balancing strategies

const (
	TacticTokenBased TacticType // Rotate by token consumption

	TacticRandom        // Random selection with weighted probability
	TacticLatencyBased  // Route based on response latency
	TacticSpeedBased    // Route based on token generation speed
	TacticAdaptive      // Composite multi-dimensional routing
	TacticCapacityBased // 6: NEW - capacity-based load balancing
)

func ParseTacticType

func ParseTacticType(s string) TacticType

ParseTacticType parses string to TacticType

func (TacticType) MarshalJSON

func (tt TacticType) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler for TacticType

func (TacticType) String

func (tt TacticType) String() string

String returns string representation of TacticType

func (*TacticType) UnmarshalJSON

func (tt *TacticType) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler for TacticType

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL