autoroute

package

v0.5.21 Latest Latest Go to latest Published: Jun 2, 2026 License: Apache-2.0 Imports: 17 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/traylinx/switchAILocal

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func AutoDetectTier(provider string) string
func CalculateRQS(outcome RequestOutcome, weights RQSWeightConfig) float64
func ClassifyTier(probe ProbeResult) string
func ConservationMultiplier(tier string, complexity float64, cfg ConservationConfig) float64
func EstimateComplexity(content string) float64
func ExtractContentFromRawJSON(rawJSON []byte) string
func ExtractProvider(modelID string) string
func GetEffectiveTier(model, provider string, cfg Config) string
func IsAutoModel(model string) bool
func ParseAutoModelHint(model string) (base string, hint string)
func PreferenceBoost(modelID string, preferences []ModelPreference) float64
func TierBoost(tier string) float64
type AutoResolver
- func NewAutoResolver(cfg Config, workspaceDir string) *AutoResolver
- func (r *AutoResolver) GetCandidates() []CandidateInput
- func (r *AutoResolver) GetLabStatus() *LabStatus
- func (r *AutoResolver) GetRecentJournal(n int) []JournalEntry
- func (r *AutoResolver) RecordOutcome(reqID string, decision *RoutingDecision, provider string, ...)
- func (r *AutoResolver) Resolve(ctx context.Context, req *RoutingRequest) (*RoutingDecision, error)
- func (r *AutoResolver) SeedCandidates(candidates []CandidateInput)
- func (r *AutoResolver) Shutdown()
- func (r *AutoResolver) StartLab(ctx context.Context)
- func (r *AutoResolver) StartMonitor(ctx context.Context)
type AutoRoutingResult
- func ResolveAutoRequest(ctx context.Context, modelName string, content string, resolver *AutoResolver) *AutoRoutingResult
type CandidateInput
type Config
- func DefaultConfig() Config
- func (c *Config) Validate() error
type ConservationConfig
type DiscoveredModel
type DiscoveryConfig
type DiscoveryService
- func NewDiscoveryService(cfg DiscoveryConfig) *DiscoveryService
- func (s *DiscoveryService) DiscoverAll(ctx context.Context) map[string]ProbeResult
- func (s *DiscoveryService) GetCachedProbes() map[string]ProbeResult
- func (s *DiscoveryService) RegisterProber(p ProviderProber)
type ExperimentJournal
- func NewExperimentJournal(workspaceDir string, enabled bool) (*ExperimentJournal, error)
- func (j *ExperimentJournal) Close() error
- func (j *ExperimentJournal) GetRecent(n int) []JournalEntry
- func (j *ExperimentJournal) Record(entry JournalEntry)
type FallbackEntry
type IntentClassification
- func ClassifyIntent(content string, intentHint string) IntentClassification
type JournalEntry
type Lab
- func NewLab(cfg Config, scorer *ProviderScorer, journal *ExperimentJournal) *Lab
- func (l *Lab) GetStatus() LabStatus
- func (l *Lab) RecordOutcome(reqID string, intent string, complexity float64, prodDecision *RoutingDecision, ...)
- func (l *Lab) Start(ctx context.Context)
- func (l *Lab) Stop()
type LabConfig
type LabStatus
type ModelPreference
type ProbeResult
type ProviderHealthMonitor
- func NewProviderHealthMonitor(resolver *AutoResolver, interval time.Duration) *ProviderHealthMonitor
- func (m *ProviderHealthMonitor) RecordRequestOutcome(provider string, latency time.Duration, success bool, httpCode int, ...)
- func (m *ProviderHealthMonitor) RegisterInitialCandidates(candidates []CandidateInput)
- func (m *ProviderHealthMonitor) Start(ctx context.Context)
- func (m *ProviderHealthMonitor) Stop()
type ProviderHealthState
type ProviderProber
type ProviderScorer
- func NewProviderScorer(cfg Config) *ProviderScorer
- func (s *ProviderScorer) GetWeights() ScoringWeights
- func (s *ProviderScorer) ScoreAll(candidates []CandidateInput, complexity float64) []ScoredCandidate
- func (s *ProviderScorer) SetWeights(w ScoringWeights)
type ProviderTierConfig
type RQSWeightConfig
type RateLimitHeaderConfig
type RateLimitSnapshot
- func ParseRateLimitHeaders(provider string, headers http.Header) RateLimitSnapshot
type RateLimits
type RequestOutcome
type RoutingDecision
- func (d *RoutingDecision) String() string
type RoutingRequest
type ScoredCandidate
type ScoringWeights
type SubscriptionInfo

Constants ¶

View Source

const (
	AuthTypeOAuth   = "oauth"
	AuthTypeAPIKey  = "api-key"
	AuthTypeLocal   = "local"
	AuthTypeUnknown = "unknown"
)

Supported provider connection types

View Source

const (
	IntentCoding    = "coding"
	IntentReasoning = "reasoning"
	IntentCreative  = "creative"
	IntentFast      = "fast"
	IntentVision    = "vision"
	IntentSecure    = "secure"
	IntentGeneral   = "general"
)

Intent constants define the standard intent categories.

View Source

const (
	TierPremium  = "premium"
	TierStandard = "standard"
	TierFree     = "free"
	TierLocal    = "local"
	TierReserve  = "reserve"
)

Tier constants define the provider subscription levels.

View Source

const DefaultAltSuccessRQS = 0.85

DefaultAltSuccessRQS is the optimistic RQS estimate assigned when shadow selections route away from a failing production provider. It represents a conservative-but-hopeful guess that the alternative model would have succeeded (similar to prod success RQS without the 0.40 success penalty).

Variables ¶

View Source

var DefaultRQSWeights = RQSWeightConfig{
	Success:      0.40,
	Latency:      0.20,
	Efficiency:   0.20,
	Conservation: 0.20,
}

DefaultRQSWeights provides the standard baseline for evaluating routing quality.

View Source

var ErrAutoRoutingDisabled = errors.New("auto-routing is disabled")

ErrAutoRoutingDisabled is returned when auto-routing is not enabled in config.

View Source

var ErrNoAvailableProviders = errors.New("all providers are unavailable")

ErrNoAvailableProviders is returned when all candidate providers are unavailable.

View Source

var ErrResolutionTimeout = errors.New("auto-routing resolution exceeded timeout budget")

ErrResolutionTimeout is returned when the resolution exceeds the configured budget.

Functions ¶

func AutoDetectTier ¶

func AutoDetectTier(provider string) string

AutoDetectTier infers a provider's tier from its name when no explicit config exists.

func CalculateRQS ¶

func CalculateRQS(outcome RequestOutcome, weights RQSWeightConfig) float64

CalculateRQS computes the Routing Quality Score (0.0 to 1.0) for a single request outcome. Higher is always better. This is the single metric the Lab optimizes for.

func ClassifyTier ¶

func ClassifyTier(probe ProbeResult) string

ClassifyTier analyzes the probe result to determine the provider's subscription tier. Returns "free", "standard", or "premium".

func ConservationMultiplier ¶

func ConservationMultiplier(tier string, complexity float64, cfg ConservationConfig) float64

ConservationMultiplier adjusts the final score based on task complexity and provider tier. This prevents wasting premium tokens on trivial tasks.

func EstimateComplexity ¶

func EstimateComplexity(content string) float64

EstimateComplexity provides a fast heuristic complexity estimate from content length. Uses the len(content)/4 token approximation (~100ns, well within 5ms budget).

func ExtractContentFromRawJSON ¶

func ExtractContentFromRawJSON(rawJSON []byte) string

ExtractContentFromRawJSON returns a text sample from a raw JSON request body for complexity estimation. This is intentionally coarse — it does NOT parse the "content" field from JSON. Instead it returns the raw string (or its tail for large payloads). EstimateComplexity then uses len/4 as a token proxy.

NOTE: This over-estimates token count for JSON-heavy payloads (system prompts, tool definitions, etc.) but is acceptable because EstimateComplexity uses wide buckets (0.1/0.3/0.5/0.7/0.9), making the coarseness tolerable.

func ExtractProvider ¶

func ExtractProvider(modelID string) string

ExtractProvider extracts the provider portion from a "provider:model" string.

func GetEffectiveTier ¶

func GetEffectiveTier(model, provider string, cfg Config) string

GetEffectiveTier resolves the effective tier for a model. Model-level overrides (e.g., ollama model-tiers) take precedence over provider-level tiers.

func IsAutoModel ¶

func IsAutoModel(model string) bool

IsAutoModel returns true if the model string signals auto-routing (i.e., "auto", "", or "auto:*").

func ParseAutoModelHint ¶

func ParseAutoModelHint(model string) (base string, hint string)

ParseAutoModelHint extracts the intent hint from a model name like "auto:coding". Returns ("auto", "") for plain "auto", ("auto", "coding") for "auto:coding".

func PreferenceBoost ¶

func PreferenceBoost(modelID string, preferences []ModelPreference) float64

PreferenceBoost returns the additive preference boost for a specific model ID. The raw preference (0.0-1.0) is scaled by 0.20 to limit its influence to 20%.

func TierBoost ¶

func TierBoost(tier string) float64

TierBoost returns the additive score boost for a given tier.

Types ¶

type AutoResolver ¶

type AutoResolver struct {
	// contains filtered or unexported fields
}

AutoResolver orchestrates the full routing decision pipeline.

func NewAutoResolver ¶

func NewAutoResolver(cfg Config, workspaceDir string) *AutoResolver

NewAutoResolver creates a new resolver with the given configuration. workspaceDir is used for persistent storage (journal TSV files).

func (*AutoResolver) GetCandidates ¶

func (r *AutoResolver) GetCandidates() []CandidateInput

GetCandidates returns the current live state of providers

func (*AutoResolver) GetLabStatus ¶

func (r *AutoResolver) GetLabStatus() *LabStatus

GetLabStatus returns the current live telemetry from the autonomous experiment engine.

func (*AutoResolver) GetRecentJournal ¶

func (r *AutoResolver) GetRecentJournal(n int) []JournalEntry

GetRecentJournal returns the most recent routing decisions logged by the lab.

func (*AutoResolver) RecordOutcome ¶

func (r *AutoResolver) RecordOutcome(reqID string, decision *RoutingDecision, provider string, latency time.Duration, success bool, httpCode int, headers http.Header)

RecordOutcome allows the proxy handler to passively report request statistics and triggers the Lab optimization cycle.

func (*AutoResolver) Resolve ¶

func (r *AutoResolver) Resolve(ctx context.Context, req *RoutingRequest) (*RoutingDecision, error)

Resolve executes the full routing pipeline:

Validate config (enabled?)
Estimate complexity from content
Filter by intent (if applicable)
Score all candidates
Select winner + build fallback chain

The entire operation is budgeted to cfg.MaxResolution (default 5ms).

func (*AutoResolver) SeedCandidates ¶

func (r *AutoResolver) SeedCandidates(candidates []CandidateInput)

SeedCandidates allows the DiscoveryService to inject real-time provider data and registers them with the health monitor for initial state tracking.

func (*AutoResolver) Shutdown ¶

func (r *AutoResolver) Shutdown()

Shutdown gracefully stops the monitor, lab, and closes the journal file.

func (*AutoResolver) StartLab ¶ added in v0.5.7

func (r *AutoResolver) StartLab(ctx context.Context)

StartLab begins the autonomous self-optimization loop if enabled in config.

func (*AutoResolver) StartMonitor ¶

func (r *AutoResolver) StartMonitor(ctx context.Context)

StartMonitor begins background health checks.

type AutoRoutingResult ¶

type AutoRoutingResult struct {
	// ResolvedModel is the model chosen by the auto-router (e.g., "geminicli:gemini-3.1-pro")
	ResolvedModel string

	// Providers is the ordered list of providers to try (winner first, then fallbacks)
	Providers []string

	// Intent is the classified or hinted intent (e.g., "coding")
	Intent string

	// Complexity is the estimated prompt complexity (0.0-1.0)
	Complexity float64

	// Decision is the full routing rationale, retained for Lab telemetry
	Decision *RoutingDecision

	// WasAutoRouted indicates this request was handled by auto-routing
	WasAutoRouted bool
}

AutoRoutingResult holds the outcome of an auto-routing resolution, consumable by the handler pipeline.

func ResolveAutoRequest ¶

func ResolveAutoRequest(ctx context.Context, modelName string, content string, resolver *AutoResolver) *AutoRoutingResult

ResolveAutoRequest runs the auto-routing pipeline for a given request. It takes the model name (which may contain an intent hint like "auto:coding"), the user content for complexity estimation, and the resolver instance.

If the resolver is nil or routing is disabled, it returns nil (caller should fall back to legacy logic).

type CandidateInput ¶

type CandidateInput struct {
	Model       string        // Full model ID (e.g., "geminicli:gemini-3.1-pro")
	Provider    string        // Provider name (e.g., "geminicli")
	Available   bool          // Is the provider reachable and has active credentials?
	QuotaHealth float64       // 0.0 (exhausted) to 1.0 (full), based on active/cooled credentials
	Latency     time.Duration // Average observed latency (0 = unknown)
	SuccessRate float64       // 0.0 to 1.0 historical success rate (-1 = unknown/cold start)
}

CandidateInput represents the raw health data for a single model candidate. These values are provided by external health monitors, registries, and stats trackers.

type Config ¶

type Config struct {
	Enabled       bool                          `yaml:"enabled" json:"enabled"`
	MaxResolution time.Duration                 `yaml:"max-resolution-ms" json:"max_resolution_ms"`
	Providers     map[string]ProviderTierConfig `yaml:"providers" json:"providers"`
	Preferences   []ModelPreference             `yaml:"preferences" json:"preferences"`
	Conservation  ConservationConfig            `yaml:"conservation" json:"conservation"`
	Discovery     DiscoveryConfig               `yaml:"discovery" json:"discovery"`
	Weights       ScoringWeights                `yaml:"weights" json:"weights"`
	IntentMatrix  map[string][]string           `yaml:"intent-matrix" json:"intent_matrix"`
	Lab           LabConfig                     `yaml:"lab" json:"lab"`
	// DisabledProviders lists provider names that should never be selected, even if
	// they score highest. Use when credits are exhausted or a provider is known-bad.
	// Example: [anthropic] skips all Anthropic models until the list is cleared.
	DisabledProviders []string `yaml:"disabled-providers" json:"disabled_providers"`
}

Config represents the master configuration for the Auto-Routing subsystem.

func DefaultConfig ¶

func DefaultConfig() Config

DefaultConfig returns the default safe configuration for Auto-Routing (opt-in).

func (*Config) Validate ¶

func (c *Config) Validate() error

Validate ensures the configuration is mathematically sound and safe to execute.

type ConservationConfig ¶

type ConservationConfig struct {
	Enabled               bool    `yaml:"enabled" json:"enabled"`
	SimpleThreshold       int     `yaml:"simple-threshold-tokens" json:"simple_threshold_tokens"`
	PremiumConservationAt float64 `yaml:"premium-conservation-at" json:"premium_conservation_at"` // percentage (0.0 to 1.0)
}

ConservationConfig dictates how aggressively the router should hoard premium tokens.

type DiscoveredModel ¶

type DiscoveredModel struct {
	ID      string
	Context int
}

DiscoveredModel represents a model found during probing

type DiscoveryConfig ¶

type DiscoveryConfig struct {
	Enabled           bool          `yaml:"enabled" json:"enabled"`
	ProbeOnStartup    bool          `yaml:"probe-on-startup" json:"probe_on_startup"`
	ProbeInterval     time.Duration `yaml:"probe-interval" json:"probe_interval"`
	ProbeTimeout      time.Duration `yaml:"probe-timeout" json:"probe_timeout"`
	PassiveMonitoring bool          `yaml:"passive-monitoring" json:"passive_monitoring"`
	CacheTTL          time.Duration `yaml:"cache-ttl" json:"cache_ttl"`
}

DiscoveryConfig controls active and passive intelligence gathering.

type DiscoveryService ¶

type DiscoveryService struct {
	// contains filtered or unexported fields
}

DiscoveryService orchestrates parallel health and tier probing

func NewDiscoveryService ¶

func NewDiscoveryService(cfg DiscoveryConfig) *DiscoveryService

NewDiscoveryService initializes the orchestrator with configured probers

func (*DiscoveryService) DiscoverAll ¶

func (s *DiscoveryService) DiscoverAll(ctx context.Context) map[string]ProbeResult

DiscoverAll runs all registered probes in parallel

func (*DiscoveryService) GetCachedProbes ¶

func (s *DiscoveryService) GetCachedProbes() map[string]ProbeResult

GetCachedProbes returns the latest discovery results

func (*DiscoveryService) RegisterProber ¶

func (s *DiscoveryService) RegisterProber(p ProviderProber)

RegisterProber adds a provider-specific probe to the pipeline

type ExperimentJournal ¶

type ExperimentJournal struct {
	// contains filtered or unexported fields
}

ExperimentJournal maintains a TSV audit trail of all routing decisions and shadow experiments, directly inspired by AutoResearch.

func NewExperimentJournal ¶

func NewExperimentJournal(workspaceDir string, enabled bool) (*ExperimentJournal, error)

NewExperimentJournal initializes an append-only TSV logger.

func (*ExperimentJournal) Close ¶

func (j *ExperimentJournal) Close() error

Close gracefully flushes and closes the journal file.

func (*ExperimentJournal) GetRecent ¶

func (j *ExperimentJournal) GetRecent(n int) []JournalEntry

GetRecent returns up to the last n entries from the in-memory ring buffer, ordered from newest to oldest.

func (*ExperimentJournal) Record ¶

func (j *ExperimentJournal) Record(entry JournalEntry)

Record appends a new entry to the TSV journal.

type FallbackEntry ¶

type FallbackEntry struct {
	Provider string `json:"provider"`
	Model    string `json:"model"`
	Tier     string `json:"tier"`
}

FallbackEntry represents a selected provider/model combination in the execution chain.

type IntentClassification ¶

type IntentClassification struct {
	Intent     string  // The detected intent category
	Confidence float64 // 0.0 - 1.0
	Method     string  // "hint", "heuristic", or "cortex"
}

IntentClassification represents a classified intent with confidence.

func ClassifyIntent ¶

func ClassifyIntent(content string, intentHint string) IntentClassification

ClassifyIntent detects the intent of a user message using fast heuristics. This is the "Reflex Tier" from the Cortex Router spec — pattern matching that runs in <1ms, no LLM required.

Returns IntentGeneral if no strong signal is detected.

type JournalEntry ¶

type JournalEntry struct {
	Timestamp  time.Time
	RequestID  string
	Intent     string
	Complexity float64
	// Production Decision
	ProdModel   string
	ProdTier    string
	ProdLatency time.Duration
	ProdSuccess bool
	ProdRQS     float64
	// Shadow Decision (if diff from Prod)
	ShadowModel       string
	ShadowTier        string
	ShadowExpectedRQS float64 // Predicted RQS based on heuristic/history
	// Active Weights
	WeightAvail   float64
	WeightQuota   float64
	WeightLatency float64
	WeightSuccess float64
}

JournalEntry represents a single row in the TSV log.

type Lab ¶

type Lab struct {
	// contains filtered or unexported fields
}

Lab orchestrates the autonomous self-optimization loop for routing weights. It directly implements the "fixed-budget, single-metric, keep-or-discard" loop inspired by AutoResearch.

func NewLab ¶

func NewLab(cfg Config, scorer *ProviderScorer, journal *ExperimentJournal) *Lab

NewLab initializes the self-optimizing research loop.

func (*Lab) GetStatus ¶

func (l *Lab) GetStatus() LabStatus

GetStatus returns the current live telemetry from the lab.

func (*Lab) RecordOutcome ¶

func (l *Lab) RecordOutcome(reqID string, intent string, complexity float64, prodDecision *RoutingDecision, prodOutcome RequestOutcome)

RecordOutcome is called after every request to log the real-world performance and compare it against the shadow prediction.

func (*Lab) Start ¶

func (l *Lab) Start(ctx context.Context)

Start begins the background adaptation loop.

func (*Lab) Stop ¶

func (l *Lab) Stop()

Stop halts the background loop and closes the journal. Safe to call multiple times.

type LabConfig ¶

type LabConfig struct {
	Enabled              bool          `yaml:"enabled" json:"enabled"`
	AdaptationInterval   time.Duration `yaml:"adaptation-interval" json:"adaptation_interval"`
	MaxWeightDrift       float64       `yaml:"max-weight-drift" json:"max_weight_drift"`
	MinObservationWindow int           `yaml:"min-observation-window" json:"min_observation_window"` // min requests before evaluating (default 10)
}

LabConfig controls the autonomous self-optimization engine (autoresearch plugin equivalent).

type LabStatus ¶

type LabStatus struct {
	Enabled             bool           `json:"enabled"`
	ActiveWeights       ScoringWeights `json:"active_weights"`
	ShadowWeights       ScoringWeights `json:"shadow_weights"`
	ActiveHypothesis    bool           `json:"active_hypothesis"`
	WindowReqCount      int            `json:"window_req_count"`
	WindowExploredCount int            `json:"window_explored_count"`
	AvgProdRQS          float64        `json:"avg_prod_rqs"`
	AvgShadowRQS        float64        `json:"avg_shadow_rqs"`
}

LabStatus provides a snapshot of the current optimization experiment state for the UI.

type ModelPreference ¶

type ModelPreference struct {
	Model      string  `yaml:"model" json:"model"`
	Preference float64 `yaml:"preference" json:"preference"` // 0.0 to 1.0 multiplier boost
	Reason     string  `yaml:"reason,omitempty" json:"reason,omitempty"`
}

ModelPreference allows soft-steering of specific models.

type ProbeResult ¶

type ProbeResult struct {
	Provider         string
	Available        bool
	AuthType         string
	SubscriptionInfo *SubscriptionInfo
	Models           []DiscoveredModel
	RateLimits       *RateLimits
	Latency          time.Duration
	ProbeError       error
	ProbedAt         time.Time
}

ProbeResult holds the outcome of questioning a specific provider

type ProviderHealthMonitor ¶

type ProviderHealthMonitor struct {
	// contains filtered or unexported fields
}

ProviderHealthMonitor runs passively and actively to maintain provider health.

func NewProviderHealthMonitor ¶

func NewProviderHealthMonitor(resolver *AutoResolver, interval time.Duration) *ProviderHealthMonitor

NewProviderHealthMonitor initializes the monitoring subsystem.

func (*ProviderHealthMonitor) RecordRequestOutcome ¶

func (m *ProviderHealthMonitor) RecordRequestOutcome(provider string, latency time.Duration, success bool, httpCode int, headers http.Header)

RecordRequestOutcome is called by the handler pipeline natively on every request.

func (*ProviderHealthMonitor) RegisterInitialCandidates ¶

func (m *ProviderHealthMonitor) RegisterInitialCandidates(candidates []CandidateInput)

RegisterInitialCandidates bootstraps the health monitor with the boot-time discovery

func (*ProviderHealthMonitor) Start ¶

func (m *ProviderHealthMonitor) Start(ctx context.Context)

Start begins the background active monitoring loop.

func (*ProviderHealthMonitor) Stop ¶

func (m *ProviderHealthMonitor) Stop()

Stop halts the background monitoring. Safe to call multiple times.

type ProviderHealthState ¶

type ProviderHealthState struct {
	LastProbe       time.Time
	Status          string        // "healthy", "degraded", "unavailable"
	Latency         time.Duration // Exponential moving average
	SuccessRate     float64       // Sliding window approximation
	QuotaRemaining  float64       // 0.0 - 1.0
	ConsecutiveFail int
	CoolingDown     bool
	CooldownUntil   time.Time
	// CooldownAttempts counts how many times this provider has tripped the
	// circuit. Used to compute exponential backoff for the next cooldown.
	// Resets to 0 after a successful request that follows a cooldown.
	CooldownAttempts int
}

ProviderHealthState tracks the live latency, availability, and quota health of a specific provider.

type ProviderProber ¶

type ProviderProber interface {
	// Name returns the provider name (e.g., "geminicli")
	Name() string
	// Probe executes the health/discovery check
	Probe(ctx context.Context) ProbeResult
}

ProviderProber defines the interface for probing a specific backend

type ProviderScorer ¶

type ProviderScorer struct {
	// contains filtered or unexported fields
}

ProviderScorer implements the composite scoring algorithm.

func NewProviderScorer ¶

func NewProviderScorer(cfg Config) *ProviderScorer

NewProviderScorer creates a scorer with the given configuration.

func (*ProviderScorer) GetWeights ¶

func (s *ProviderScorer) GetWeights() ScoringWeights

GetWeights returns a snapshot of the current scoring weights (thread-safe).

func (*ProviderScorer) ScoreAll ¶

func (s *ProviderScorer) ScoreAll(candidates []CandidateInput, complexity float64) []ScoredCandidate

ScoreAll evaluates and sorts all candidates, returning scored results. The complexity parameter (0.0-1.0) drives the conservation multiplier.

func (*ProviderScorer) SetWeights ¶

func (s *ProviderScorer) SetWeights(w ScoringWeights)

SetWeights atomically replaces the scoring weights (thread-safe).

type ProviderTierConfig ¶

type ProviderTierConfig struct {
	Tier          string            `yaml:"tier" json:"tier"`
	MonthlyBudget float64           `yaml:"monthly-budget,omitempty" json:"monthly_budget,omitempty"`
	ModelTiers    map[string]string `yaml:"model-tiers,omitempty" json:"model_tiers,omitempty"`
}

ProviderTierConfig defines fixed constraints and tier allocations for a provider.

type RQSWeightConfig ¶

type RQSWeightConfig struct {
	Success      float64
	Latency      float64
	Efficiency   float64
	Conservation float64
}

RQSWeightConfig defines the importance of different factors in the Routing Quality Score (RQS). This is the master "val_bpb" metric.

type RateLimitHeaderConfig ¶

type RateLimitHeaderConfig struct {
	RequestLimit     string
	RequestRemaining string
	RequestReset     string
	TokenLimit       string
	TokenRemaining   string
	TokenReset       string
	RetryAfter       string
}

RateLimitHeaderConfig maps provider-specific HTTP header names to rate-limit semantics. Each provider uses slightly different header names, so we normalise them into a common struct.

type RateLimitSnapshot ¶

type RateLimitSnapshot struct {
	Provider string

	// Request-level limits
	RequestLimit     int
	RequestRemaining int
	RequestReset     time.Time

	// Token-level limits
	TokenLimit     int
	TokenRemaining int
	TokenReset     time.Time

	// Retry-After (seconds or absolute time)
	RetryAfterSec int

	// Derived health metric (0.0 = exhausted, 1.0 = full)
	QuotaHealth float64

	// Whether any rate-limit headers were detected at all
	Detected bool
}

RateLimitSnapshot is the normalised quota state extracted from a single HTTP response.

func ParseRateLimitHeaders ¶

func ParseRateLimitHeaders(provider string, headers http.Header) RateLimitSnapshot

ParseRateLimitHeaders extracts rate-limit quota data from an HTTP response according to the provider's known header scheme. If no relevant headers are found, Detected will be false.

type RateLimits ¶

type RateLimits struct {
	RequestsPerMinute int
	TokensPerMinute   int
}

RateLimits holds quota limits parsed from HTTP headers

type RequestOutcome ¶

type RequestOutcome struct {
	Timestamp           time.Time
	Model               string
	Provider            string
	Tier                string
	Latency             time.Duration
	Success             bool
	StatusCode          int
	EstimatedComplexity float64
	TokensUsed          int // If available
}

RequestOutcome represents the real-world result of a routing decision.

type RoutingDecision ¶

type RoutingDecision struct {
	// SelectedModel is the primary model chosen for the request
	SelectedModel string `json:"selected_model"`

	// FallbackChain is an ordered list of models to try if the primary fails
	FallbackChain []FallbackEntry `json:"fallback_chain"`

	// Intent hints or classifications (e.g., "coding", "creative")
	Intent string `json:"intent"`

	// EstimatedComplexity of the prompt (0.0 to 1.0)
	EstimatedComplexity float64 `json:"estimated_complexity"`

	// Candidates evaluated (useful for debugging/dashboard)
	Candidates []ScoredCandidate `json:"candidates,omitempty"`

	// OriginalInputs preserves the raw CandidateInput values at decision time,
	// enabling the Lab to replay exact conditions for shadow scoring (H1 fix).
	OriginalInputs []CandidateInput `json:"-"`

	// ResolutionLatency tracks how long the scoring process took
	ResolutionLatency time.Duration `json:"resolution_latency_ms"`
}

RoutingDecision represents the final selected strategy determined by the AutoResolver.

func (*RoutingDecision) String ¶

func (d *RoutingDecision) String() string

String returns a human-readable summary of the routing decision.

type RoutingRequest ¶

type RoutingRequest struct {
	// Content is the user's message text (used for complexity estimation)
	Content string

	// IntentHint is an explicit intent from the model name (e.g., "auto:coding" → "coding")
	IntentHint string

	// AvailableModels is the list of candidate model IDs currently registered
	AvailableModels []CandidateInput
}

RoutingRequest encapsulates everything the resolver needs to make a decision.

type ScoredCandidate ¶

type ScoredCandidate struct {
	// Model is the canonical internal model identifier (e.g., "geminicli:gemini-3.1-pro")
	Model string `json:"model"`

	// Provider is the extracted provider name (e.g., "geminicli")
	Provider string `json:"provider"`

	// Score Breakdown
	FinalScore       float64 `json:"final_score"`
	BaseScore        float64 `json:"base_score"`
	TierBoost        float64 `json:"tier_boost"`
	PreferenceBoost  float64 `json:"preference_boost"`
	ConservationMult float64 `json:"conservation_multiplier"`

	// EffectiveTier is the final assigned tier (e.g., "premium", "free") after resolving overrides
	EffectiveTier string `json:"effective_tier"`

	// Available tracks if the health monitor considers this candidate routable
	Available bool `json:"available"`

	// EstimatedLatency via health monitor EMAs
	EstimatedLatency time.Duration `json:"estimated_latency_ms"`
}

ScoredCandidate represents a model that has been evaluated and scored by the ProviderScorer.

type ScoringWeights ¶

type ScoringWeights struct {
	Availability float64 `yaml:"availability" json:"availability"`
	Quota        float64 `yaml:"quota" json:"quota"`
	Latency      float64 `yaml:"latency" json:"latency"`
	SuccessRate  float64 `yaml:"success-rate" json:"success_rate"`
}

ScoringWeights represent the importance of different health metrics.

type SubscriptionInfo ¶

type SubscriptionInfo struct {
	Tier           string // "free", "standard", "premium", "unknown"
	QuotaTotal     int64  // -1 = unlimited
	QuotaRemaining int64
	ResetTime      time.Time
	Source         string // "api-header", "cli-command", "inferred", "config-override"
}

SubscriptionInfo holds tier details detected from probes

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
probes

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL