Documentation
¶
Overview ¶
Package model provides a unified model registry for centralized endpoint configuration, capability-based routing, and tool capability metadata.
Index ¶
- type CapabilityConfig
- type DefaultsConfig
- type EndpointConfig
- type Registry
- func (r *Registry) GetDefault() string
- func (r *Registry) GetEndpoint(name string) *EndpointConfig
- func (r *Registry) GetFallbackChain(capability string) []string
- func (r *Registry) GetMaxTokens(name string) int
- func (r *Registry) ListCapabilities() []string
- func (r *Registry) ListEndpoints() []string
- func (r *Registry) Resolve(capability string) string
- func (r *Registry) ResolveSummarization() string
- func (r *Registry) Validate() error
- type RegistryReader
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CapabilityConfig ¶
type CapabilityConfig struct {
// Description explains what this capability is for.
Description string `json:"description,omitempty"`
// Preferred lists endpoint names in order of preference.
Preferred []string `json:"preferred"`
// Fallback lists backup endpoint names if all preferred are unavailable.
Fallback []string `json:"fallback,omitempty"`
// RequiresTools filters the chain to only tool-capable endpoints.
RequiresTools bool `json:"requires_tools,omitempty"`
}
CapabilityConfig defines model preferences for a capability.
type DefaultsConfig ¶
type DefaultsConfig struct {
// Model is the default endpoint name when no capability matches.
Model string `json:"model"`
// Capability is the default capability when none specified.
Capability string `json:"capability,omitempty"`
}
DefaultsConfig holds default model settings.
type EndpointConfig ¶
type EndpointConfig struct {
// Provider identifies the API type: "anthropic", "ollama", "openai", "openrouter".
Provider string `json:"provider"`
// URL is the API endpoint. Required for ollama/openai/openrouter, optional for anthropic.
URL string `json:"url,omitempty"`
// Model is the model identifier sent to the provider.
Model string `json:"model"`
// MaxTokens is the context window size in tokens.
MaxTokens int `json:"max_tokens"`
// SupportsTools indicates whether this endpoint supports function/tool calling.
SupportsTools bool `json:"supports_tools,omitempty"`
// ToolFormat specifies the tool calling format: "anthropic" or "openai".
// Empty means auto-detect from provider.
ToolFormat string `json:"tool_format,omitempty"`
// APIKeyEnv is the environment variable containing the API key.
// Required for anthropic/openai/openrouter, ignored for ollama.
APIKeyEnv string `json:"api_key_env,omitempty"`
// Options holds provider-specific template parameters passed to the API
// as chat_template_kwargs. For vLLM/SGLang with thinking models, set
// "enable_thinking" and "thinking_budget" here.
//
// Note: Ollama's OpenAI-compatible endpoint ignores chat_template_kwargs.
// Ollama thinking models (Qwen3, DeepSeek-R1) always return reasoning_content
// but the thinking toggle and budget are only controllable via Ollama's
// native /api/chat endpoint, not the OpenAI-compatible /v1/ endpoint.
//
// Do not use for inference parameters (temperature, top_k, etc.) which
// have dedicated fields in AgentRequest.
Options map[string]any `json:"options,omitempty"`
// Stream enables SSE streaming for this endpoint. The client uses
// CreateChatCompletionStream internally, reducing time-to-first-token.
// The inter-component protocol remains complete AgentResponse messages.
Stream bool `json:"stream,omitempty"`
// ReasoningEffort controls how much effort reasoning models spend thinking.
// Accepted values: "none" (Gemini only), "low", "medium", "high".
// Empty means the provider default is used. Forwarded as reasoning_effort
// on the OpenAI-compatible chat completions request.
ReasoningEffort string `json:"reasoning_effort,omitempty"`
// InputPricePer1MTokens is the cost per 1M input tokens in USD.
// Consumers join this with token usage data to calculate costs.
InputPricePer1MTokens float64 `json:"input_price_per_1m_tokens,omitempty"`
// OutputPricePer1MTokens is the cost per 1M output tokens in USD.
// Consumers join this with token usage data to calculate costs.
OutputPricePer1MTokens float64 `json:"output_price_per_1m_tokens,omitempty"`
// RequestsPerMinute limits the rate of requests to this endpoint.
// 0 means no rate limiting. Applied per-endpoint across all consumers.
RequestsPerMinute int `json:"requests_per_minute,omitempty"`
// MaxConcurrent limits concurrent in-flight requests to this endpoint.
// 0 means no concurrency limit.
MaxConcurrent int `json:"max_concurrent,omitempty"`
}
EndpointConfig defines an available model endpoint.
type Registry ¶
type Registry struct {
Capabilities map[string]*CapabilityConfig `json:"capabilities,omitempty"`
Endpoints map[string]*EndpointConfig `json:"endpoints"`
Defaults DefaultsConfig `json:"defaults"`
}
Registry holds all model endpoint definitions and capability routing. It is JSON-serializable for config loading and implements RegistryReader.
func (*Registry) GetDefault ¶
GetDefault returns the default endpoint name.
func (*Registry) GetEndpoint ¶
func (r *Registry) GetEndpoint(name string) *EndpointConfig
GetEndpoint returns the endpoint configuration for a name, or nil if not found.
func (*Registry) GetFallbackChain ¶
GetFallbackChain returns all endpoint names for a capability in preference order.
func (*Registry) GetMaxTokens ¶
GetMaxTokens returns the context window size for an endpoint name.
func (*Registry) ListCapabilities ¶
ListCapabilities returns all configured capability names sorted alphabetically.
func (*Registry) ListEndpoints ¶
ListEndpoints returns all configured endpoint names sorted alphabetically.
func (*Registry) ResolveSummarization ¶
ResolveSummarization returns the endpoint name best suited for summarization.
type RegistryReader ¶
type RegistryReader interface {
// Resolve returns the preferred endpoint name for a capability.
// Returns the first endpoint in the preferred list.
// If RequiresTools is set, filters to tool-capable endpoints.
Resolve(capability string) string
// GetFallbackChain returns all endpoint names for a capability in preference order.
// Includes both preferred and fallback endpoints.
GetFallbackChain(capability string) []string
// GetEndpoint returns the full endpoint configuration for an endpoint name.
// Returns nil if the endpoint is not configured.
GetEndpoint(name string) *EndpointConfig
// GetMaxTokens returns the context window size for an endpoint name.
// Returns 0 if the endpoint is not configured.
GetMaxTokens(name string) int
// GetDefault returns the default endpoint name.
GetDefault() string
// ListCapabilities returns all configured capability names sorted alphabetically.
ListCapabilities() []string
// ListEndpoints returns all configured endpoint names sorted alphabetically.
ListEndpoints() []string
// ResolveSummarization returns the endpoint name to use for context summarization.
// Resolution order:
// 1. Explicit "summarization" capability if configured
// 2. Endpoint with the largest MaxTokens (best suited for long context summarization)
// 3. The default endpoint as final fallback
ResolveSummarization() string
}
RegistryReader provides read-only access to the model registry. Components receive this interface via Dependencies.