model

package

v1.0.0-alpha.26 Latest Latest Go to latest Published: Mar 11, 2026 License: MIT Imports: 2 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/c360studio/semstreams

Links

Open Source Insights

Documentation ¶

Overview ¶

Package model provides a unified model registry for centralized endpoint configuration, capability-based routing, and tool capability metadata.

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type CapabilityConfig ¶

type CapabilityConfig struct {
	// Description explains what this capability is for.
	Description string `json:"description,omitempty"`
	// Preferred lists endpoint names in order of preference.
	Preferred []string `json:"preferred"`
	// Fallback lists backup endpoint names if all preferred are unavailable.
	Fallback []string `json:"fallback,omitempty"`
	// RequiresTools filters the chain to only tool-capable endpoints.
	RequiresTools bool `json:"requires_tools,omitempty"`
}

CapabilityConfig defines model preferences for a capability.

type DefaultsConfig ¶

type DefaultsConfig struct {
	// Model is the default endpoint name when no capability matches.
	Model string `json:"model"`
	// Capability is the default capability when none specified.
	Capability string `json:"capability,omitempty"`
}

DefaultsConfig holds default model settings.

type EndpointConfig ¶

type EndpointConfig struct {
	// Provider identifies the API type: "anthropic", "ollama", "openai", "openrouter".
	Provider string `json:"provider"`
	// URL is the API endpoint. Required for ollama/openai/openrouter, optional for anthropic.
	URL string `json:"url,omitempty"`
	// Model is the model identifier sent to the provider.
	Model string `json:"model"`
	// MaxTokens is the context window size in tokens.
	MaxTokens int `json:"max_tokens"`
	// SupportsTools indicates whether this endpoint supports function/tool calling.
	SupportsTools bool `json:"supports_tools,omitempty"`
	// ToolFormat specifies the tool calling format: "anthropic" or "openai".
	// Empty means auto-detect from provider.
	ToolFormat string `json:"tool_format,omitempty"`
	// APIKeyEnv is the environment variable containing the API key.
	// Required for anthropic/openai/openrouter, ignored for ollama.
	APIKeyEnv string `json:"api_key_env,omitempty"`
	// Options holds provider-specific template parameters passed to the API
	// as chat_template_kwargs. For vLLM/SGLang with thinking models, set
	// "enable_thinking" and "thinking_budget" here.
	//
	// Note: Ollama's OpenAI-compatible endpoint ignores chat_template_kwargs.
	// Ollama thinking models (Qwen3, DeepSeek-R1) always return reasoning_content
	// but the thinking toggle and budget are only controllable via Ollama's
	// native /api/chat endpoint, not the OpenAI-compatible /v1/ endpoint.
	//
	// Do not use for inference parameters (temperature, top_k, etc.) which
	// have dedicated fields in AgentRequest.
	Options map[string]any `json:"options,omitempty"`
	// Stream enables SSE streaming for this endpoint. The client uses
	// CreateChatCompletionStream internally, reducing time-to-first-token.
	// The inter-component protocol remains complete AgentResponse messages.
	Stream bool `json:"stream,omitempty"`
	// ReasoningEffort controls how much effort reasoning models spend thinking.
	// Accepted values: "none" (Gemini only), "low", "medium", "high".
	// Empty means the provider default is used. Forwarded as reasoning_effort
	// on the OpenAI-compatible chat completions request.
	ReasoningEffort string `json:"reasoning_effort,omitempty"`
	// InputPricePer1MTokens is the cost per 1M input tokens in USD.
	// Consumers join this with token usage data to calculate costs.
	InputPricePer1MTokens float64 `json:"input_price_per_1m_tokens,omitempty"`
	// OutputPricePer1MTokens is the cost per 1M output tokens in USD.
	// Consumers join this with token usage data to calculate costs.
	OutputPricePer1MTokens float64 `json:"output_price_per_1m_tokens,omitempty"`
	// RequestsPerMinute limits the rate of requests to this endpoint.
	// 0 means no rate limiting. Applied per-endpoint across all consumers.
	RequestsPerMinute int `json:"requests_per_minute,omitempty"`
	// MaxConcurrent limits concurrent in-flight requests to this endpoint.
	// 0 means no concurrency limit.
	MaxConcurrent int `json:"max_concurrent,omitempty"`
}

EndpointConfig defines an available model endpoint.

type Registry ¶

type Registry struct {
	Capabilities map[string]*CapabilityConfig `json:"capabilities,omitempty"`
	Endpoints    map[string]*EndpointConfig   `json:"endpoints"`
	Defaults     DefaultsConfig               `json:"defaults"`
}

Registry holds all model endpoint definitions and capability routing. It is JSON-serializable for config loading and implements RegistryReader.

func (*Registry) GetDefault ¶

func (r *Registry) GetDefault() string

GetDefault returns the default endpoint name.

func (*Registry) GetEndpoint ¶

func (r *Registry) GetEndpoint(name string) *EndpointConfig

GetEndpoint returns the endpoint configuration for a name, or nil if not found.

func (*Registry) GetFallbackChain ¶

func (r *Registry) GetFallbackChain(capability string) []string

GetFallbackChain returns all endpoint names for a capability in preference order.

func (*Registry) GetMaxTokens ¶

func (r *Registry) GetMaxTokens(name string) int

GetMaxTokens returns the context window size for an endpoint name.

func (*Registry) ListCapabilities ¶

func (r *Registry) ListCapabilities() []string

ListCapabilities returns all configured capability names sorted alphabetically.

func (*Registry) ListEndpoints ¶

func (r *Registry) ListEndpoints() []string

ListEndpoints returns all configured endpoint names sorted alphabetically.

func (*Registry) Resolve ¶

func (r *Registry) Resolve(capability string) string

Resolve returns the preferred endpoint name for a capability.

func (*Registry) ResolveSummarization ¶

func (r *Registry) ResolveSummarization() string

ResolveSummarization returns the endpoint name best suited for summarization.

func (*Registry) Validate ¶

func (r *Registry) Validate() error

Validate checks the registry configuration for consistency.

type RegistryReader ¶

type RegistryReader interface {
	// Resolve returns the preferred endpoint name for a capability.
	// Returns the first endpoint in the preferred list.
	// If RequiresTools is set, filters to tool-capable endpoints.
	Resolve(capability string) string

	// GetFallbackChain returns all endpoint names for a capability in preference order.
	// Includes both preferred and fallback endpoints.
	GetFallbackChain(capability string) []string

	// GetEndpoint returns the full endpoint configuration for an endpoint name.
	// Returns nil if the endpoint is not configured.
	GetEndpoint(name string) *EndpointConfig

	// GetMaxTokens returns the context window size for an endpoint name.
	// Returns 0 if the endpoint is not configured.
	GetMaxTokens(name string) int

	// GetDefault returns the default endpoint name.
	GetDefault() string

	// ListCapabilities returns all configured capability names sorted alphabetically.
	ListCapabilities() []string

	// ListEndpoints returns all configured endpoint names sorted alphabetically.
	ListEndpoints() []string

	// ResolveSummarization returns the endpoint name to use for context summarization.
	// Resolution order:
	//  1. Explicit "summarization" capability if configured
	//  2. Endpoint with the largest MaxTokens (best suited for long context summarization)
	//  3. The default endpoint as final fallback
	ResolveSummarization() string
}

RegistryReader provides read-only access to the model registry. Components receive this interface via Dependencies.

Source Files ¶

View all Source files

registry.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL