loadbalance

package

v0.26041.1000-preview Latest Latest Go to latest Published: Apr 11, 2026 License: MPL-2.0 Imports: 5 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/tingly-dev/tingly-box

Links

Open Source Insights

Documentation ¶

Index ¶

type HealthMonitor
- func NewHealthMonitor(config HealthMonitorConfig) *HealthMonitor
type HealthMonitorConfig
- func DefaultHealthMonitorConfig() HealthMonitorConfig
type HealthProbeFunc
type HealthStatus
- func (h HealthStatus) String() string
type Service
type ServiceHealth
type ServiceID
- func NewServiceID(providerUUID, model string) ServiceID
- func (id ServiceID) String() string
type ServiceStats
type TacticType
- func ParseTacticType(s string) TacticType

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type HealthMonitor ¶

type HealthMonitor struct {
	// contains filtered or unexported fields
}

HealthMonitor manages health status for all services

func NewHealthMonitor ¶

func NewHealthMonitor(config HealthMonitorConfig) *HealthMonitor

NewHealthMonitor creates a new health monitor with the given configuration

func (*HealthMonitor) GetAllHealth ¶

func (hm *HealthMonitor) GetAllHealth() map[string]*ServiceHealth

GetAllHealth returns health status for all services

func (*HealthMonitor) GetHealth ¶

func (hm *HealthMonitor) GetHealth(serviceID string) *ServiceHealth

GetHealth returns the health status for a service

func (*HealthMonitor) IsHealthy ¶

func (hm *HealthMonitor) IsHealthy(serviceID string) bool

IsHealthy checks if a service is healthy (with time-based recovery and probing)

func (*HealthMonitor) RemoveHealth ¶

func (hm *HealthMonitor) RemoveHealth(serviceID string)

RemoveHealth removes a service's health record

func (*HealthMonitor) ReportAuthError ¶

func (hm *HealthMonitor) ReportAuthError(serviceID string, statusCode int)

ReportAuthError reports a 401/403 auth error for a service

func (*HealthMonitor) ReportError ¶

func (hm *HealthMonitor) ReportError(serviceID string, err error)

ReportError reports a retryable error for a service

func (*HealthMonitor) ReportRateLimit ¶

func (hm *HealthMonitor) ReportRateLimit(serviceID string)

ReportRateLimit reports a 429 rate limit error for a service

func (*HealthMonitor) ReportSuccess ¶

func (hm *HealthMonitor) ReportSuccess(serviceID string)

ReportSuccess reports a successful request for a service

func (*HealthMonitor) ResetHealth ¶

func (hm *HealthMonitor) ResetHealth(serviceID string)

ResetHealth manually resets a service's health to healthy

func (*HealthMonitor) SetProbeFunc ¶

func (hm *HealthMonitor) SetProbeFunc(fn HealthProbeFunc)

SetProbeFunc sets the probe function for health checking during recovery

func (*HealthMonitor) UpdateConfig ¶

func (hm *HealthMonitor) UpdateConfig(config HealthMonitorConfig)

UpdateConfig updates the health monitor configuration

type HealthMonitorConfig ¶

type HealthMonitorConfig struct {
	// ConsecutiveErrorThreshold is the number of consecutive errors before marking unhealthy
	ConsecutiveErrorThreshold int `json:"consecutive_error_threshold" yaml:"consecutive_error_threshold"`
	// RecoveryTimeoutSeconds is the time in seconds before auto-recovery
	RecoveryTimeoutSeconds int `json:"recovery_timeout_seconds" yaml:"recovery_timeout_seconds"`
	// ProbeEnabled enables health check probing before marking service healthy
	ProbeEnabled bool `json:"probe_enabled" yaml:"probe_enabled"`
}

HealthMonitorConfig holds configuration for health monitoring

func DefaultHealthMonitorConfig ¶

func DefaultHealthMonitorConfig() HealthMonitorConfig

DefaultHealthMonitorConfig returns default configuration

type HealthProbeFunc ¶

type HealthProbeFunc func(serviceID string) bool

HealthProbeFunc is the function type for probing service health Returns true if service is healthy, false otherwise

type HealthStatus ¶

type HealthStatus int

HealthStatus represents the health state of a service

const (
	HealthHealthy   HealthStatus = iota // Service is healthy and available
	HealthUnhealthy                     // Service is unhealthy (rate limited, failing)
)

func (HealthStatus) String ¶

func (h HealthStatus) String() string

String returns the string representation of HealthStatus

type Service ¶

type Service struct {
	Provider      string       `yaml:"provider" json:"provider"`                                 // Provider name / uuid
	Model         string       `yaml:"model" json:"model"`                                       // Model name
	Weight        int          `yaml:"weight" json:"weight"`                                     // Weight for load balancing
	Active        bool         `yaml:"active" json:"active"`                                     // Whether this service is active
	TimeWindow    int          `yaml:"time_window" json:"time_window"`                           // Statistics time window in seconds
	ModelCapacity *int         `yaml:"model_capacity,omitempty" json:"model_capacity,omitempty"` // ModelCapacity overrides the provider's default_model_capacity for this specific model
	Stats         ServiceStats `yaml:"-" json:"-"`                                               // Service usage statistics (stored in SQLite, not in config)
}

Service represents a provider-model combination for load balancing

func (*Service) GetServiceID ¶ added in v0.260409.1540

func (s *Service) GetServiceID() ServiceID

GetServiceID returns the typed service identifier.

func (*Service) GetWindowStats ¶

func (s *Service) GetWindowStats() (requestCount int64, tokensConsumed int64)

GetWindowStats returns current window statistics for this service

func (*Service) InitializeStats ¶

func (s *Service) InitializeStats()

InitializeStats initializes the service statistics if they are empty

func (*Service) RecordUsage ¶

func (s *Service) RecordUsage(inputTokens, outputTokens int)

RecordUsage records usage for this service

func (*Service) ServiceID ¶

func (s *Service) ServiceID() string

ServiceID returns a unique string identifier for the service (provider:model). Deprecated: use GetServiceID() for the typed form.

type ServiceHealth ¶

type ServiceHealth struct {
	ServiceID         string        // Unique service identifier (provider:model)
	Status            HealthStatus  // Current health status
	LastError         error         // Last error that caused unhealthy state
	LastErrorTime     time.Time     // When the error occurred
	ConsecutiveErrors int           // Count of consecutive errors
	RateLimited       bool          // True if last error was 429
	AuthError         bool          // True if last error was 401/403
	LastHealthCheck   time.Time     // Last time health was checked
	RecoveryTimeout   time.Duration // Time before auto-recovery
	// contains filtered or unexported fields
}

ServiceHealth tracks health information for a single service

type ServiceID ¶ added in v0.260409.1540

type ServiceID struct {
	ProviderUUID string `json:"provider_uuid"`
	Model        string `json:"model"`
}

ServiceID uniquely identifies a provider+model combination in load balancing.

func NewServiceID ¶ added in v0.260409.1540

func NewServiceID(providerUUID, model string) ServiceID

NewServiceID creates a ServiceID from provider UUID/name and model.

func (ServiceID) String ¶ added in v0.260409.1540

func (id ServiceID) String() string

String returns a stable string for use as map key and logging.

type ServiceStats ¶

type ServiceStats struct {
	ServiceID            string    `json:"service_id"`             // Unique service identifier
	RequestCount         int64     `json:"request_count"`          // Total request count
	LastUsed             time.Time `json:"last_used"`              // Last usage timestamp
	WindowStart          time.Time `json:"window_start"`           // Current time window start
	WindowRequestCount   int64     `json:"window_request_count"`   // Requests in current window
	WindowTokensConsumed int64     `json:"window_tokens_consumed"` // Tokens consumed in current window (input + output)
	WindowInputTokens    int64     `json:"window_input_tokens"`    // Input tokens in current window
	WindowOutputTokens   int64     `json:"window_output_tokens"`   // Output tokens in current window
	TimeWindow           int       `json:"time_window"`            // Copy of service's time window

	// Latency tracking fields
	LatencySamples    []int64   `json:"-"`                   // Rolling window of latency samples (in ms)
	AvgLatencyMs      float64   `json:"avg_latency_ms"`      // Average latency in current window
	P50LatencyMs      float64   `json:"p50_latency_ms"`      // 50th percentile latency
	P95LatencyMs      float64   `json:"p95_latency_ms"`      // 95th percentile latency
	P99LatencyMs      float64   `json:"p99_latency_ms"`      // 99th percentile latency
	LastLatencyUpdate time.Time `json:"last_latency_update"` // When latency was last updated

	// Token speed tracking fields (tokens per second)
	SpeedSamples    []float64 `json:"-"`                 // Rolling window of token speed samples
	AvgTokenSpeed   float64   `json:"avg_token_speed"`   // Average tokens per second
	LastSpeedUpdate time.Time `json:"last_speed_update"` // When speed was last updated

	// TTFT (Time To First Token) tracking fields
	TTFTSamples    []int64   `json:"-"`                // Rolling window of TTFT samples (in ms)
	AvgTTFTMs      float64   `json:"avg_ttft_ms"`      // Average TTFT in milliseconds
	P50TTFTMs      float64   `json:"p50_ttft_ms"`      // 50th percentile TTFT
	P95TTFTMs      float64   `json:"p95_ttft_ms"`      // 95th percentile TTFT
	P99TTFTMs      float64   `json:"p99_ttft_ms"`      // 99th percentile TTFT
	LastTTFTUpdate time.Time `json:"last_ttft_update"` // When TTFT was last updated

	// Cache tracking fields
	WindowCacheHits   int64   `json:"window_cache_hits"`   // Cache hits in current window
	WindowCacheMisses int64   `json:"window_cache_misses"` // Cache misses in current window
	CacheHitRate      float64 `json:"cache_hit_rate"`      // Cache hit rate (hits / total)

	// Cost tracking fields (token-based)
	WindowCostTokens int64 `json:"window_cost_tokens"` // Total tokens as cost proxy in current window
	// contains filtered or unexported fields
}

ServiceStats tracks usage statistics for a service

func (*ServiceStats) GetCacheStats ¶

func (ss *ServiceStats) GetCacheStats() (hitRate float64, hits int64, misses int64)

GetCacheStats returns current cache statistics

func (*ServiceStats) GetCostMetrics ¶

func (ss *ServiceStats) GetCostMetrics() int64

GetCostMetrics returns cost-related metrics (token-based)

func (*ServiceStats) GetLatencyStats ¶

func (ss *ServiceStats) GetLatencyStats() (avg, p50, p95, p99 float64, sampleCount int)

GetLatencyStats returns current latency statistics

func (*ServiceStats) GetStats ¶

func (ss *ServiceStats) GetStats() ServiceStats

GetStats returns a copy of current statistics

func (*ServiceStats) GetTTFTStats ¶

func (ss *ServiceStats) GetTTFTStats() (avg, p50, p95, p99 float64, sampleCount int)

GetTTFTStats returns current TTFT statistics

func (*ServiceStats) GetTokenSpeedStats ¶

func (ss *ServiceStats) GetTokenSpeedStats() (avgSpeed float64, sampleCount int)

GetTokenSpeedStats returns current token speed statistics

func (*ServiceStats) GetWindowStats ¶

func (ss *ServiceStats) GetWindowStats() (requestCount int64, tokensConsumed int64)

GetWindowStats returns current window statistics

func (*ServiceStats) GetWindowTokenDetails ¶

func (ss *ServiceStats) GetWindowTokenDetails() (requestCount int64, inputTokens int64, outputTokens int64)

GetWindowTokenDetails returns current window input and output token details

func (*ServiceStats) IsWindowExpired ¶

func (ss *ServiceStats) IsWindowExpired() bool

IsWindowExpired checks if the current time window has expired

func (*ServiceStats) RecordCacheHit ¶

func (ss *ServiceStats) RecordCacheHit(isHit bool)

RecordCacheHit records a cache hit or miss event

func (*ServiceStats) RecordLatency ¶

func (ss *ServiceStats) RecordLatency(latencyMs int64, maxSamples int)

RecordLatency records a latency sample for this service

func (*ServiceStats) RecordTTFT ¶

func (ss *ServiceStats) RecordTTFT(ttftMs int64, maxSamples int)

RecordTTFT records a Time To First Token sample (in milliseconds)

func (*ServiceStats) RecordTokenSpeed ¶

func (ss *ServiceStats) RecordTokenSpeed(speedTps float64, maxSamples int)

RecordTokenSpeed records a token speed sample (tokens per second)

func (*ServiceStats) RecordUsage ¶

func (ss *ServiceStats) RecordUsage(inputTokens, outputTokens int)

RecordUsage records a usage event for this service

func (*ServiceStats) ResetWindow ¶

func (ss *ServiceStats) ResetWindow()

ResetWindow resets the time window statistics

type TacticType ¶

type TacticType int

TacticType represents different load balancing strategies

const (
	TacticTokenBased TacticType // Rotate by token consumption

	TacticRandom        // Random selection with weighted probability
	TacticLatencyBased  // Route based on response latency
	TacticSpeedBased    // Route based on token generation speed
	TacticAdaptive      // Composite multi-dimensional routing
	TacticCapacityBased // 6: NEW - capacity-based load balancing
)

func ParseTacticType ¶

func ParseTacticType(s string) TacticType

ParseTacticType parses string to TacticType

func (TacticType) MarshalJSON ¶

func (tt TacticType) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler for TacticType

func (TacticType) String ¶

func (tt TacticType) String() string

String returns string representation of TacticType

func (*TacticType) UnmarshalJSON ¶

func (tt *TacticType) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler for TacticType

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL