Documentation
¶
Index ¶
- Constants
- Variables
- func AutoDetectTier(provider string) string
- func CalculateRQS(outcome RequestOutcome, weights RQSWeightConfig) float64
- func ClassifyTier(probe ProbeResult) string
- func ConservationMultiplier(tier string, complexity float64, cfg ConservationConfig) float64
- func EstimateComplexity(content string) float64
- func ExtractContentFromRawJSON(rawJSON []byte) string
- func ExtractProvider(modelID string) string
- func GetEffectiveTier(model, provider string, cfg Config) string
- func IsAutoModel(model string) bool
- func ParseAutoModelHint(model string) (base string, hint string)
- func PreferenceBoost(modelID string, preferences []ModelPreference) float64
- func TierBoost(tier string) float64
- type AutoResolver
- func (r *AutoResolver) GetCandidates() []CandidateInput
- func (r *AutoResolver) GetLabStatus() *LabStatus
- func (r *AutoResolver) GetRecentJournal(n int) []JournalEntry
- func (r *AutoResolver) RecordOutcome(reqID string, decision *RoutingDecision, provider string, ...)
- func (r *AutoResolver) Resolve(ctx context.Context, req *RoutingRequest) (*RoutingDecision, error)
- func (r *AutoResolver) SeedCandidates(candidates []CandidateInput)
- func (r *AutoResolver) Shutdown()
- func (r *AutoResolver) StartLab(ctx context.Context)
- func (r *AutoResolver) StartMonitor(ctx context.Context)
- type AutoRoutingResult
- type CandidateInput
- type Config
- type ConservationConfig
- type DiscoveredModel
- type DiscoveryConfig
- type DiscoveryService
- type ExperimentJournal
- type FallbackEntry
- type IntentClassification
- type JournalEntry
- type Lab
- type LabConfig
- type LabStatus
- type ModelPreference
- type ProbeResult
- type ProviderHealthMonitor
- func (m *ProviderHealthMonitor) RecordRequestOutcome(provider string, latency time.Duration, success bool, httpCode int, ...)
- func (m *ProviderHealthMonitor) RegisterInitialCandidates(candidates []CandidateInput)
- func (m *ProviderHealthMonitor) Start(ctx context.Context)
- func (m *ProviderHealthMonitor) Stop()
- type ProviderHealthState
- type ProviderProber
- type ProviderScorer
- type ProviderTierConfig
- type RQSWeightConfig
- type RateLimitHeaderConfig
- type RateLimitSnapshot
- type RateLimits
- type RequestOutcome
- type RoutingDecision
- type RoutingRequest
- type ScoredCandidate
- type ScoringWeights
- type SubscriptionInfo
Constants ¶
const ( AuthTypeOAuth = "oauth" AuthTypeAPIKey = "api-key" AuthTypeLocal = "local" AuthTypeUnknown = "unknown" )
Supported provider connection types
const ( IntentCoding = "coding" IntentReasoning = "reasoning" IntentCreative = "creative" IntentFast = "fast" IntentVision = "vision" IntentSecure = "secure" IntentGeneral = "general" )
Intent constants define the standard intent categories.
const ( TierPremium = "premium" TierStandard = "standard" TierFree = "free" TierLocal = "local" TierReserve = "reserve" )
Tier constants define the provider subscription levels.
const DefaultAltSuccessRQS = 0.85
DefaultAltSuccessRQS is the optimistic RQS estimate assigned when shadow selections route away from a failing production provider. It represents a conservative-but-hopeful guess that the alternative model would have succeeded (similar to prod success RQS without the 0.40 success penalty).
Variables ¶
var DefaultRQSWeights = RQSWeightConfig{
Success: 0.40,
Latency: 0.20,
Efficiency: 0.20,
Conservation: 0.20,
}
DefaultRQSWeights provides the standard baseline for evaluating routing quality.
var ErrAutoRoutingDisabled = errors.New("auto-routing is disabled")
ErrAutoRoutingDisabled is returned when auto-routing is not enabled in config.
var ErrNoAvailableProviders = errors.New("all providers are unavailable")
ErrNoAvailableProviders is returned when all candidate providers are unavailable.
var ErrResolutionTimeout = errors.New("auto-routing resolution exceeded timeout budget")
ErrResolutionTimeout is returned when the resolution exceeds the configured budget.
Functions ¶
func AutoDetectTier ¶
AutoDetectTier infers a provider's tier from its name when no explicit config exists.
func CalculateRQS ¶
func CalculateRQS(outcome RequestOutcome, weights RQSWeightConfig) float64
CalculateRQS computes the Routing Quality Score (0.0 to 1.0) for a single request outcome. Higher is always better. This is the single metric the Lab optimizes for.
func ClassifyTier ¶
func ClassifyTier(probe ProbeResult) string
ClassifyTier analyzes the probe result to determine the provider's subscription tier. Returns "free", "standard", or "premium".
func ConservationMultiplier ¶
func ConservationMultiplier(tier string, complexity float64, cfg ConservationConfig) float64
ConservationMultiplier adjusts the final score based on task complexity and provider tier. This prevents wasting premium tokens on trivial tasks.
func EstimateComplexity ¶
EstimateComplexity provides a fast heuristic complexity estimate from content length. Uses the len(content)/4 token approximation (~100ns, well within 5ms budget).
func ExtractContentFromRawJSON ¶
ExtractContentFromRawJSON returns a text sample from a raw JSON request body for complexity estimation. This is intentionally coarse — it does NOT parse the "content" field from JSON. Instead it returns the raw string (or its tail for large payloads). EstimateComplexity then uses len/4 as a token proxy.
NOTE: This over-estimates token count for JSON-heavy payloads (system prompts, tool definitions, etc.) but is acceptable because EstimateComplexity uses wide buckets (0.1/0.3/0.5/0.7/0.9), making the coarseness tolerable.
func ExtractProvider ¶
ExtractProvider extracts the provider portion from a "provider:model" string.
func GetEffectiveTier ¶
GetEffectiveTier resolves the effective tier for a model. Model-level overrides (e.g., ollama model-tiers) take precedence over provider-level tiers.
func IsAutoModel ¶
IsAutoModel returns true if the model string signals auto-routing (i.e., "auto", "", or "auto:*").
func ParseAutoModelHint ¶
ParseAutoModelHint extracts the intent hint from a model name like "auto:coding". Returns ("auto", "") for plain "auto", ("auto", "coding") for "auto:coding".
func PreferenceBoost ¶
func PreferenceBoost(modelID string, preferences []ModelPreference) float64
PreferenceBoost returns the additive preference boost for a specific model ID. The raw preference (0.0-1.0) is scaled by 0.20 to limit its influence to 20%.
Types ¶
type AutoResolver ¶
type AutoResolver struct {
// contains filtered or unexported fields
}
AutoResolver orchestrates the full routing decision pipeline.
func NewAutoResolver ¶
func NewAutoResolver(cfg Config, workspaceDir string) *AutoResolver
NewAutoResolver creates a new resolver with the given configuration. workspaceDir is used for persistent storage (journal TSV files).
func (*AutoResolver) GetCandidates ¶
func (r *AutoResolver) GetCandidates() []CandidateInput
GetCandidates returns the current live state of providers
func (*AutoResolver) GetLabStatus ¶
func (r *AutoResolver) GetLabStatus() *LabStatus
GetLabStatus returns the current live telemetry from the autonomous experiment engine.
func (*AutoResolver) GetRecentJournal ¶
func (r *AutoResolver) GetRecentJournal(n int) []JournalEntry
GetRecentJournal returns the most recent routing decisions logged by the lab.
func (*AutoResolver) RecordOutcome ¶
func (r *AutoResolver) RecordOutcome(reqID string, decision *RoutingDecision, provider string, latency time.Duration, success bool, httpCode int, headers http.Header)
RecordOutcome allows the proxy handler to passively report request statistics and triggers the Lab optimization cycle.
func (*AutoResolver) Resolve ¶
func (r *AutoResolver) Resolve(ctx context.Context, req *RoutingRequest) (*RoutingDecision, error)
Resolve executes the full routing pipeline:
- Validate config (enabled?)
- Estimate complexity from content
- Filter by intent (if applicable)
- Score all candidates
- Select winner + build fallback chain
The entire operation is budgeted to cfg.MaxResolution (default 5ms).
func (*AutoResolver) SeedCandidates ¶
func (r *AutoResolver) SeedCandidates(candidates []CandidateInput)
SeedCandidates allows the DiscoveryService to inject real-time provider data and registers them with the health monitor for initial state tracking.
func (*AutoResolver) Shutdown ¶
func (r *AutoResolver) Shutdown()
Shutdown gracefully stops the monitor, lab, and closes the journal file.
func (*AutoResolver) StartLab ¶ added in v0.5.7
func (r *AutoResolver) StartLab(ctx context.Context)
StartLab begins the autonomous self-optimization loop if enabled in config.
func (*AutoResolver) StartMonitor ¶
func (r *AutoResolver) StartMonitor(ctx context.Context)
StartMonitor begins background health checks.
type AutoRoutingResult ¶
type AutoRoutingResult struct {
// ResolvedModel is the model chosen by the auto-router (e.g., "geminicli:gemini-3.1-pro")
ResolvedModel string
// Providers is the ordered list of providers to try (winner first, then fallbacks)
Providers []string
// Intent is the classified or hinted intent (e.g., "coding")
Intent string
// Complexity is the estimated prompt complexity (0.0-1.0)
Complexity float64
// Decision is the full routing rationale, retained for Lab telemetry
Decision *RoutingDecision
// WasAutoRouted indicates this request was handled by auto-routing
WasAutoRouted bool
}
AutoRoutingResult holds the outcome of an auto-routing resolution, consumable by the handler pipeline.
func ResolveAutoRequest ¶
func ResolveAutoRequest(ctx context.Context, modelName string, content string, resolver *AutoResolver) *AutoRoutingResult
ResolveAutoRequest runs the auto-routing pipeline for a given request. It takes the model name (which may contain an intent hint like "auto:coding"), the user content for complexity estimation, and the resolver instance.
If the resolver is nil or routing is disabled, it returns nil (caller should fall back to legacy logic).
type CandidateInput ¶
type CandidateInput struct {
Model string // Full model ID (e.g., "geminicli:gemini-3.1-pro")
Provider string // Provider name (e.g., "geminicli")
Available bool // Is the provider reachable and has active credentials?
QuotaHealth float64 // 0.0 (exhausted) to 1.0 (full), based on active/cooled credentials
Latency time.Duration // Average observed latency (0 = unknown)
SuccessRate float64 // 0.0 to 1.0 historical success rate (-1 = unknown/cold start)
}
CandidateInput represents the raw health data for a single model candidate. These values are provided by external health monitors, registries, and stats trackers.
type Config ¶
type Config struct {
Enabled bool `yaml:"enabled" json:"enabled"`
MaxResolution time.Duration `yaml:"max-resolution-ms" json:"max_resolution_ms"`
Providers map[string]ProviderTierConfig `yaml:"providers" json:"providers"`
Preferences []ModelPreference `yaml:"preferences" json:"preferences"`
Conservation ConservationConfig `yaml:"conservation" json:"conservation"`
Discovery DiscoveryConfig `yaml:"discovery" json:"discovery"`
Weights ScoringWeights `yaml:"weights" json:"weights"`
IntentMatrix map[string][]string `yaml:"intent-matrix" json:"intent_matrix"`
Lab LabConfig `yaml:"lab" json:"lab"`
// DisabledProviders lists provider names that should never be selected, even if
// they score highest. Use when credits are exhausted or a provider is known-bad.
// Example: [anthropic] skips all Anthropic models until the list is cleared.
DisabledProviders []string `yaml:"disabled-providers" json:"disabled_providers"`
}
Config represents the master configuration for the Auto-Routing subsystem.
func DefaultConfig ¶
func DefaultConfig() Config
DefaultConfig returns the default safe configuration for Auto-Routing (opt-in).
type ConservationConfig ¶
type ConservationConfig struct {
Enabled bool `yaml:"enabled" json:"enabled"`
SimpleThreshold int `yaml:"simple-threshold-tokens" json:"simple_threshold_tokens"`
PremiumConservationAt float64 `yaml:"premium-conservation-at" json:"premium_conservation_at"` // percentage (0.0 to 1.0)
}
ConservationConfig dictates how aggressively the router should hoard premium tokens.
type DiscoveredModel ¶
DiscoveredModel represents a model found during probing
type DiscoveryConfig ¶
type DiscoveryConfig struct {
Enabled bool `yaml:"enabled" json:"enabled"`
ProbeOnStartup bool `yaml:"probe-on-startup" json:"probe_on_startup"`
ProbeInterval time.Duration `yaml:"probe-interval" json:"probe_interval"`
ProbeTimeout time.Duration `yaml:"probe-timeout" json:"probe_timeout"`
PassiveMonitoring bool `yaml:"passive-monitoring" json:"passive_monitoring"`
CacheTTL time.Duration `yaml:"cache-ttl" json:"cache_ttl"`
}
DiscoveryConfig controls active and passive intelligence gathering.
type DiscoveryService ¶
type DiscoveryService struct {
// contains filtered or unexported fields
}
DiscoveryService orchestrates parallel health and tier probing
func NewDiscoveryService ¶
func NewDiscoveryService(cfg DiscoveryConfig) *DiscoveryService
NewDiscoveryService initializes the orchestrator with configured probers
func (*DiscoveryService) DiscoverAll ¶
func (s *DiscoveryService) DiscoverAll(ctx context.Context) map[string]ProbeResult
DiscoverAll runs all registered probes in parallel
func (*DiscoveryService) GetCachedProbes ¶
func (s *DiscoveryService) GetCachedProbes() map[string]ProbeResult
GetCachedProbes returns the latest discovery results
func (*DiscoveryService) RegisterProber ¶
func (s *DiscoveryService) RegisterProber(p ProviderProber)
RegisterProber adds a provider-specific probe to the pipeline
type ExperimentJournal ¶
type ExperimentJournal struct {
// contains filtered or unexported fields
}
ExperimentJournal maintains a TSV audit trail of all routing decisions and shadow experiments, directly inspired by AutoResearch.
func NewExperimentJournal ¶
func NewExperimentJournal(workspaceDir string, enabled bool) (*ExperimentJournal, error)
NewExperimentJournal initializes an append-only TSV logger.
func (*ExperimentJournal) Close ¶
func (j *ExperimentJournal) Close() error
Close gracefully flushes and closes the journal file.
func (*ExperimentJournal) GetRecent ¶
func (j *ExperimentJournal) GetRecent(n int) []JournalEntry
GetRecent returns up to the last n entries from the in-memory ring buffer, ordered from newest to oldest.
func (*ExperimentJournal) Record ¶
func (j *ExperimentJournal) Record(entry JournalEntry)
Record appends a new entry to the TSV journal.
type FallbackEntry ¶
type FallbackEntry struct {
Provider string `json:"provider"`
Model string `json:"model"`
Tier string `json:"tier"`
}
FallbackEntry represents a selected provider/model combination in the execution chain.
type IntentClassification ¶
type IntentClassification struct {
Intent string // The detected intent category
Confidence float64 // 0.0 - 1.0
Method string // "hint", "heuristic", or "cortex"
}
IntentClassification represents a classified intent with confidence.
func ClassifyIntent ¶
func ClassifyIntent(content string, intentHint string) IntentClassification
ClassifyIntent detects the intent of a user message using fast heuristics. This is the "Reflex Tier" from the Cortex Router spec — pattern matching that runs in <1ms, no LLM required.
Returns IntentGeneral if no strong signal is detected.
type JournalEntry ¶
type JournalEntry struct {
Timestamp time.Time
RequestID string
Intent string
Complexity float64
// Production Decision
ProdModel string
ProdTier string
ProdLatency time.Duration
ProdSuccess bool
ProdRQS float64
// Shadow Decision (if diff from Prod)
ShadowModel string
ShadowTier string
ShadowExpectedRQS float64 // Predicted RQS based on heuristic/history
// Active Weights
WeightAvail float64
WeightQuota float64
WeightLatency float64
WeightSuccess float64
}
JournalEntry represents a single row in the TSV log.
type Lab ¶
type Lab struct {
// contains filtered or unexported fields
}
Lab orchestrates the autonomous self-optimization loop for routing weights. It directly implements the "fixed-budget, single-metric, keep-or-discard" loop inspired by AutoResearch.
func NewLab ¶
func NewLab(cfg Config, scorer *ProviderScorer, journal *ExperimentJournal) *Lab
NewLab initializes the self-optimizing research loop.
func (*Lab) RecordOutcome ¶
func (l *Lab) RecordOutcome(reqID string, intent string, complexity float64, prodDecision *RoutingDecision, prodOutcome RequestOutcome)
RecordOutcome is called after every request to log the real-world performance and compare it against the shadow prediction.
type LabConfig ¶
type LabConfig struct {
Enabled bool `yaml:"enabled" json:"enabled"`
AdaptationInterval time.Duration `yaml:"adaptation-interval" json:"adaptation_interval"`
MaxWeightDrift float64 `yaml:"max-weight-drift" json:"max_weight_drift"`
MinObservationWindow int `yaml:"min-observation-window" json:"min_observation_window"` // min requests before evaluating (default 10)
}
LabConfig controls the autonomous self-optimization engine (autoresearch plugin equivalent).
type LabStatus ¶
type LabStatus struct {
Enabled bool `json:"enabled"`
ActiveWeights ScoringWeights `json:"active_weights"`
ShadowWeights ScoringWeights `json:"shadow_weights"`
ActiveHypothesis bool `json:"active_hypothesis"`
WindowReqCount int `json:"window_req_count"`
WindowExploredCount int `json:"window_explored_count"`
AvgProdRQS float64 `json:"avg_prod_rqs"`
AvgShadowRQS float64 `json:"avg_shadow_rqs"`
}
LabStatus provides a snapshot of the current optimization experiment state for the UI.
type ModelPreference ¶
type ModelPreference struct {
Model string `yaml:"model" json:"model"`
Preference float64 `yaml:"preference" json:"preference"` // 0.0 to 1.0 multiplier boost
Reason string `yaml:"reason,omitempty" json:"reason,omitempty"`
}
ModelPreference allows soft-steering of specific models.
type ProbeResult ¶
type ProbeResult struct {
Provider string
Available bool
AuthType string
SubscriptionInfo *SubscriptionInfo
Models []DiscoveredModel
RateLimits *RateLimits
Latency time.Duration
ProbeError error
ProbedAt time.Time
}
ProbeResult holds the outcome of questioning a specific provider
type ProviderHealthMonitor ¶
type ProviderHealthMonitor struct {
// contains filtered or unexported fields
}
ProviderHealthMonitor runs passively and actively to maintain provider health.
func NewProviderHealthMonitor ¶
func NewProviderHealthMonitor(resolver *AutoResolver, interval time.Duration) *ProviderHealthMonitor
NewProviderHealthMonitor initializes the monitoring subsystem.
func (*ProviderHealthMonitor) RecordRequestOutcome ¶
func (m *ProviderHealthMonitor) RecordRequestOutcome(provider string, latency time.Duration, success bool, httpCode int, headers http.Header)
RecordRequestOutcome is called by the handler pipeline natively on every request.
func (*ProviderHealthMonitor) RegisterInitialCandidates ¶
func (m *ProviderHealthMonitor) RegisterInitialCandidates(candidates []CandidateInput)
RegisterInitialCandidates bootstraps the health monitor with the boot-time discovery
func (*ProviderHealthMonitor) Start ¶
func (m *ProviderHealthMonitor) Start(ctx context.Context)
Start begins the background active monitoring loop.
func (*ProviderHealthMonitor) Stop ¶
func (m *ProviderHealthMonitor) Stop()
Stop halts the background monitoring. Safe to call multiple times.
type ProviderHealthState ¶
type ProviderHealthState struct {
LastProbe time.Time
Status string // "healthy", "degraded", "unavailable"
Latency time.Duration // Exponential moving average
SuccessRate float64 // Sliding window approximation
QuotaRemaining float64 // 0.0 - 1.0
ConsecutiveFail int
CoolingDown bool
CooldownUntil time.Time
// CooldownAttempts counts how many times this provider has tripped the
// circuit. Used to compute exponential backoff for the next cooldown.
// Resets to 0 after a successful request that follows a cooldown.
CooldownAttempts int
}
ProviderHealthState tracks the live latency, availability, and quota health of a specific provider.
type ProviderProber ¶
type ProviderProber interface {
// Name returns the provider name (e.g., "geminicli")
Name() string
// Probe executes the health/discovery check
Probe(ctx context.Context) ProbeResult
}
ProviderProber defines the interface for probing a specific backend
type ProviderScorer ¶
type ProviderScorer struct {
// contains filtered or unexported fields
}
ProviderScorer implements the composite scoring algorithm.
func NewProviderScorer ¶
func NewProviderScorer(cfg Config) *ProviderScorer
NewProviderScorer creates a scorer with the given configuration.
func (*ProviderScorer) GetWeights ¶
func (s *ProviderScorer) GetWeights() ScoringWeights
GetWeights returns a snapshot of the current scoring weights (thread-safe).
func (*ProviderScorer) ScoreAll ¶
func (s *ProviderScorer) ScoreAll(candidates []CandidateInput, complexity float64) []ScoredCandidate
ScoreAll evaluates and sorts all candidates, returning scored results. The complexity parameter (0.0-1.0) drives the conservation multiplier.
func (*ProviderScorer) SetWeights ¶
func (s *ProviderScorer) SetWeights(w ScoringWeights)
SetWeights atomically replaces the scoring weights (thread-safe).
type ProviderTierConfig ¶
type ProviderTierConfig struct {
Tier string `yaml:"tier" json:"tier"`
MonthlyBudget float64 `yaml:"monthly-budget,omitempty" json:"monthly_budget,omitempty"`
ModelTiers map[string]string `yaml:"model-tiers,omitempty" json:"model_tiers,omitempty"`
}
ProviderTierConfig defines fixed constraints and tier allocations for a provider.
type RQSWeightConfig ¶
type RQSWeightConfig struct {
Success float64
Latency float64
Efficiency float64
Conservation float64
}
RQSWeightConfig defines the importance of different factors in the Routing Quality Score (RQS). This is the master "val_bpb" metric.
type RateLimitHeaderConfig ¶
type RateLimitHeaderConfig struct {
RequestLimit string
RequestRemaining string
RequestReset string
TokenLimit string
TokenRemaining string
TokenReset string
RetryAfter string
}
RateLimitHeaderConfig maps provider-specific HTTP header names to rate-limit semantics. Each provider uses slightly different header names, so we normalise them into a common struct.
type RateLimitSnapshot ¶
type RateLimitSnapshot struct {
Provider string
// Request-level limits
RequestLimit int
RequestRemaining int
RequestReset time.Time
// Token-level limits
TokenLimit int
TokenRemaining int
TokenReset time.Time
// Retry-After (seconds or absolute time)
RetryAfterSec int
// Derived health metric (0.0 = exhausted, 1.0 = full)
QuotaHealth float64
// Whether any rate-limit headers were detected at all
Detected bool
}
RateLimitSnapshot is the normalised quota state extracted from a single HTTP response.
func ParseRateLimitHeaders ¶
func ParseRateLimitHeaders(provider string, headers http.Header) RateLimitSnapshot
ParseRateLimitHeaders extracts rate-limit quota data from an HTTP response according to the provider's known header scheme. If no relevant headers are found, Detected will be false.
type RateLimits ¶
RateLimits holds quota limits parsed from HTTP headers
type RequestOutcome ¶
type RequestOutcome struct {
Timestamp time.Time
Model string
Provider string
Tier string
Latency time.Duration
Success bool
StatusCode int
EstimatedComplexity float64
TokensUsed int // If available
}
RequestOutcome represents the real-world result of a routing decision.
type RoutingDecision ¶
type RoutingDecision struct {
// SelectedModel is the primary model chosen for the request
SelectedModel string `json:"selected_model"`
// FallbackChain is an ordered list of models to try if the primary fails
FallbackChain []FallbackEntry `json:"fallback_chain"`
// Intent hints or classifications (e.g., "coding", "creative")
Intent string `json:"intent"`
// EstimatedComplexity of the prompt (0.0 to 1.0)
EstimatedComplexity float64 `json:"estimated_complexity"`
// Candidates evaluated (useful for debugging/dashboard)
Candidates []ScoredCandidate `json:"candidates,omitempty"`
// OriginalInputs preserves the raw CandidateInput values at decision time,
// enabling the Lab to replay exact conditions for shadow scoring (H1 fix).
OriginalInputs []CandidateInput `json:"-"`
// ResolutionLatency tracks how long the scoring process took
ResolutionLatency time.Duration `json:"resolution_latency_ms"`
}
RoutingDecision represents the final selected strategy determined by the AutoResolver.
func (*RoutingDecision) String ¶
func (d *RoutingDecision) String() string
String returns a human-readable summary of the routing decision.
type RoutingRequest ¶
type RoutingRequest struct {
// Content is the user's message text (used for complexity estimation)
Content string
// IntentHint is an explicit intent from the model name (e.g., "auto:coding" → "coding")
IntentHint string
// AvailableModels is the list of candidate model IDs currently registered
AvailableModels []CandidateInput
}
RoutingRequest encapsulates everything the resolver needs to make a decision.
type ScoredCandidate ¶
type ScoredCandidate struct {
// Model is the canonical internal model identifier (e.g., "geminicli:gemini-3.1-pro")
Model string `json:"model"`
// Provider is the extracted provider name (e.g., "geminicli")
Provider string `json:"provider"`
// Score Breakdown
FinalScore float64 `json:"final_score"`
BaseScore float64 `json:"base_score"`
TierBoost float64 `json:"tier_boost"`
PreferenceBoost float64 `json:"preference_boost"`
ConservationMult float64 `json:"conservation_multiplier"`
// EffectiveTier is the final assigned tier (e.g., "premium", "free") after resolving overrides
EffectiveTier string `json:"effective_tier"`
// Available tracks if the health monitor considers this candidate routable
Available bool `json:"available"`
// EstimatedLatency via health monitor EMAs
EstimatedLatency time.Duration `json:"estimated_latency_ms"`
}
ScoredCandidate represents a model that has been evaluated and scored by the ProviderScorer.
type ScoringWeights ¶
type ScoringWeights struct {
Availability float64 `yaml:"availability" json:"availability"`
Quota float64 `yaml:"quota" json:"quota"`
Latency float64 `yaml:"latency" json:"latency"`
SuccessRate float64 `yaml:"success-rate" json:"success_rate"`
}
ScoringWeights represent the importance of different health metrics.
type SubscriptionInfo ¶
type SubscriptionInfo struct {
Tier string // "free", "standard", "premium", "unknown"
QuotaTotal int64 // -1 = unlimited
QuotaRemaining int64
ResetTime time.Time
Source string // "api-header", "cli-command", "inferred", "config-override"
}
SubscriptionInfo holds tier details detected from probes