Documentation
¶
Overview ¶
Package model provides the Provider interface and implementations. This file contains MockLLMServer for testing - an HTTP server that speaks the OpenAI-compatible chat API.
Package model provides the Provider interface and implementations. This file contains MockProvider for testing.
Package model provides model integration for Orla Agent Mode (RFC 4).
Index ¶
- func ParseModelIdentifier(modelID string) (provider, modelName string, err error)
- func RegisterProviderFactory(providerName string, factory ProviderFactory)
- type ContentEvent
- type InferenceOptions
- type Message
- type MessageRole
- type MockLLMServer
- type MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnContent(content string) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnNoChoices() *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnStreamChunks(chunks []string) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnStreamWithToolCalls(content string, toolCalls ...mockLLMToolCall) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnToolCall(name, argsJSON string) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnToolCallWithID(id, name, argsJSON string) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) Start() *MockLLMServer
- type MockProvider
- func (m *MockProvider) CallCount() int
- func (m *MockProvider) Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, ...) (*Response, <-chan StreamEvent, error)
- func (m *MockProvider) EnsureReady(ctx context.Context) error
- func (m *MockProvider) LastInferenceOptions() *InferenceOptions
- func (m *MockProvider) Name() string
- func (m *MockProvider) ReceivedMessages() [][]Message
- type MockProviderBuilder
- func (b *MockProviderBuilder) Build() *MockProvider
- func (b *MockProviderBuilder) WithChatError(err error) *MockProviderBuilder
- func (b *MockProviderBuilder) WithChatFunc(...) *MockProviderBuilder
- func (b *MockProviderBuilder) WithContent(content string) *MockProviderBuilder
- func (b *MockProviderBuilder) WithEnsureReadyError(err error) *MockProviderBuilder
- func (b *MockProviderBuilder) WithName(name string) *MockProviderBuilder
- func (b *MockProviderBuilder) WithStreamChunks(chunks []string) *MockProviderBuilder
- func (b *MockProviderBuilder) WithToolCall(name, argsJSON string) *MockProviderBuilder
- type OpenAIProvider
- type Provider
- type ProviderFactory
- type RequestSchedulingPolicy
- type Response
- type ResponseMetrics
- type SchedulingHints
- type SchedulingPolicy
- type StreamEvent
- type StreamEventType
- type StreamWriter
- type StructuredOutputOptions
- type ThinkingEvent
- type ToolCallEvent
- type ToolCallWithID
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ParseModelIdentifier ¶
ParseModelIdentifier parses a model identifier string (e.g., "openai:llama3") and returns the provider name and model name
func RegisterProviderFactory ¶
func RegisterProviderFactory(providerName string, factory ProviderFactory)
RegisterProviderFactory registers a provider factory by provider name (e.g. "openai").
Types ¶
type ContentEvent ¶
type ContentEvent struct {
Content string
}
ContentEvent represents a content chunk in the stream
func (*ContentEvent) Type ¶
func (e *ContentEvent) Type() StreamEventType
type InferenceOptions ¶
type InferenceOptions struct {
// Stream is whether to stream the response. A nil value means no streaming.
Stream bool `json:"stream,omitempty"`
// MaxTokens is the maximum number of tokens to generate. A nil value means use the backend default.
MaxTokens *int `json:"max_tokens,omitempty"`
// Temperature is the temperature parameter for sampling. A nil value means use the backend default.
Temperature *float64 `json:"temperature,omitempty"`
// TopP is the nucleus sampling top_p parameter. A nil value means use the backend default.
TopP *float64 `json:"top_p,omitempty"`
// ResponseFormat is the structured output options. A nil value means no structured output.
ResponseFormat *StructuredOutputOptions `json:"response_format,omitempty"`
// ChatTemplateKwargs are extra kwargs passed to the chat template renderer
ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
// SchedulingPolicy selects stage-level backend queue scheduling behavior.
SchedulingPolicy SchedulingPolicy `json:"scheduling_policy,omitempty"`
// RequestSchedulingPolicy selects request-level ordering within a stage queue.
RequestSchedulingPolicy RequestSchedulingPolicy `json:"request_scheduling_policy,omitempty"`
// SchedulingHints are optional policy hints for backend queueing.
SchedulingHints *SchedulingHints `json:"scheduling_hints,omitempty"`
// ReasoningEffort controls thinking for reasoning-capable models ("high", "medium", "low", "none").
ReasoningEffort string `json:"reasoning_effort,omitempty"`
}
InferenceOptions holds per-request inference settings (agent profile knobs). JSON tags match the execute API so the server can embed this in ExecuteRequest.
func (InferenceOptions) GetSchedulingPolicy ¶
func (o InferenceOptions) GetSchedulingPolicy() SchedulingPolicy
GetSchedulingPolicy returns the configured scheduling policy or the FCFS default.
type Message ¶
type Message struct {
// Role of the message
Role MessageRole `json:"role"`
// Content of the message
Content string `json:"content"`
// ToolName is used by the Ollama API
ToolName string `json:"tool_name,omitempty"`
// ToolCallID is used by the OpenAI API and vLLM
ToolCallID string `json:"tool_call_id,omitempty"`
// ToolCalls carries the tool calls from an assistant message so they can be
// replayed in the conversation history on the next turn.
ToolCalls []ToolCallWithID `json:"tool_calls,omitempty"`
}
Message represents a chat message in a conversation. For role "tool" i.e tool calls, set both ToolCallID and ToolName when building messages. The OpenAI API uses ToolCallID and the Ollama API uses ToolName. The providers ignore the field they do not need.
type MessageRole ¶
type MessageRole string
const ( MessageRoleUser MessageRole = "user" MessageRoleAssistant MessageRole = "assistant" MessageRoleSystem MessageRole = "system" MessageRoleTool MessageRole = "tool" )
func (MessageRole) String ¶
func (r MessageRole) String() string
type MockLLMServer ¶
type MockLLMServer struct {
// contains filtered or unexported fields
}
MockLLMServer is an HTTP server that speaks the OpenAI-compatible chat API. Used for integration tests without real API keys or external backends.
func (*MockLLMServer) LastRequestBody ¶
func (s *MockLLMServer) LastRequestBody() []byte
LastRequestBody returns a copy of the raw request body from the most recent chat request. Tests can json.Unmarshal into openai.ChatCompletionRequest to assert on request fields.
type MockLLMServerBuilder ¶
type MockLLMServerBuilder struct {
// contains filtered or unexported fields
}
MockLLMServerBuilder builds a MockLLMServer with a fluent API.
func NewMockLLMServer ¶
func NewMockLLMServer() *MockLLMServerBuilder
NewMockLLMServer returns a new MockLLMServerBuilder.
func (*MockLLMServerBuilder) ReturnContent ¶
func (b *MockLLMServerBuilder) ReturnContent(content string) *MockLLMServerBuilder
ReturnContent sets the response content for non-streaming.
func (*MockLLMServerBuilder) ReturnNoChoices ¶
func (b *MockLLMServerBuilder) ReturnNoChoices() *MockLLMServerBuilder
ReturnNoChoices configures the server to return an empty choices array (triggers "no choices" error).
func (*MockLLMServerBuilder) ReturnStreamChunks ¶
func (b *MockLLMServerBuilder) ReturnStreamChunks(chunks []string) *MockLLMServerBuilder
ReturnStreamChunks sets streaming response chunks.
func (*MockLLMServerBuilder) ReturnStreamWithToolCalls ¶
func (b *MockLLMServerBuilder) ReturnStreamWithToolCalls(content string, toolCalls ...mockLLMToolCall) *MockLLMServerBuilder
ReturnStreamWithToolCalls configures streaming response with content followed by tool calls.
func (*MockLLMServerBuilder) ReturnToolCall ¶
func (b *MockLLMServerBuilder) ReturnToolCall(name, argsJSON string) *MockLLMServerBuilder
ReturnToolCall adds a tool call to the response. argsJSON is the JSON-encoded arguments.
func (*MockLLMServerBuilder) ReturnToolCallWithID ¶
func (b *MockLLMServerBuilder) ReturnToolCallWithID(id, name, argsJSON string) *MockLLMServerBuilder
ReturnToolCallWithID adds a tool call with a specific ID.
func (*MockLLMServerBuilder) Start ¶
func (b *MockLLMServerBuilder) Start() *MockLLMServer
Start starts the server and returns it. Call Close when done.
type MockProvider ¶
type MockProvider struct {
// contains filtered or unexported fields
}
MockProvider is a mock implementation of Provider for testing. Supports both builder-based configuration and function-based customization.
func (*MockProvider) CallCount ¶
func (m *MockProvider) CallCount() int
CallCount returns the number of times Chat has been called.
func (*MockProvider) Chat ¶
func (m *MockProvider) Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, opts InferenceOptions) (*Response, <-chan StreamEvent, error)
Chat sends a chat request and returns the configured or custom response.
func (*MockProvider) EnsureReady ¶
func (m *MockProvider) EnsureReady(ctx context.Context) error
EnsureReady returns nil or the configured error.
func (*MockProvider) LastInferenceOptions ¶
func (m *MockProvider) LastInferenceOptions() *InferenceOptions
LastInferenceOptions returns the options from the most recent Chat call.
func (*MockProvider) ReceivedMessages ¶
func (m *MockProvider) ReceivedMessages() [][]Message
ReceivedMessages returns a copy of all message slices passed to Chat.
type MockProviderBuilder ¶
type MockProviderBuilder struct {
// contains filtered or unexported fields
}
MockProviderBuilder builds a MockProvider with a fluent API.
func NewMockProvider ¶
func NewMockProvider() *MockProviderBuilder
NewMockProvider returns a new MockProviderBuilder.
func (*MockProviderBuilder) Build ¶
func (b *MockProviderBuilder) Build() *MockProvider
Build returns the configured MockProvider.
func (*MockProviderBuilder) WithChatError ¶
func (b *MockProviderBuilder) WithChatError(err error) *MockProviderBuilder
WithChatError configures the provider to return an error from Chat.
func (*MockProviderBuilder) WithChatFunc ¶
func (b *MockProviderBuilder) WithChatFunc(fn func(ctx context.Context, messages []Message, tools []*mcp.Tool, opts InferenceOptions) (*Response, <-chan StreamEvent, error)) *MockProviderBuilder
WithChatFunc sets a custom Chat implementation (overrides builder-configured response).
func (*MockProviderBuilder) WithContent ¶
func (b *MockProviderBuilder) WithContent(content string) *MockProviderBuilder
WithContent sets the response content (non-streaming).
func (*MockProviderBuilder) WithEnsureReadyError ¶
func (b *MockProviderBuilder) WithEnsureReadyError(err error) *MockProviderBuilder
WithEnsureReadyError configures the provider to return an error from EnsureReady.
func (*MockProviderBuilder) WithName ¶
func (b *MockProviderBuilder) WithName(name string) *MockProviderBuilder
WithName sets the provider name.
func (*MockProviderBuilder) WithStreamChunks ¶
func (b *MockProviderBuilder) WithStreamChunks(chunks []string) *MockProviderBuilder
WithStreamChunks configures streaming response with the given chunks.
func (*MockProviderBuilder) WithToolCall ¶
func (b *MockProviderBuilder) WithToolCall(name, argsJSON string) *MockProviderBuilder
WithToolCall adds a tool call to the response. argsJSON is the JSON-encoded arguments.
type OpenAIProvider ¶
type OpenAIProvider struct {
// contains filtered or unexported fields
}
OpenAIProvider implements the Provider interface for OpenAI-compatible APIs. This provider is intended to work with any server that implements the OpenAI Chat Completions API format such as LM Studio, vLLM, and Ollama. For Ollama, use endpoint http://host:11434/v1 [1]. [1] https://docs.ollama.com/api/openai-compatibility
func NewOpenAIProvider ¶
func NewOpenAIProvider(modelName string, llmBackend *core.LLMBackend) (*OpenAIProvider, error)
NewOpenAIProvider creates a new OpenAI-compatible provider. This works with any server that implements the OpenAI Chat Completions API format.
func (*OpenAIProvider) Chat ¶
func (p *OpenAIProvider) Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, opts InferenceOptions) (*Response, <-chan StreamEvent, error)
Chat sends a chat request to the OpenAI-compatible API. This works with any server implementing the OpenAI Chat Completions API format.
func (*OpenAIProvider) EnsureReady ¶
func (p *OpenAIProvider) EnsureReady(ctx context.Context) error
EnsureReady is a no-op for the OpenAI-compatible provider.
type Provider ¶
type Provider interface {
// Name returns the provider name (e.g., "openai", "anthropic")
Name() string
// Chat sends a chat request to the model with the given inference options and returns the response.
Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, opts InferenceOptions) (*Response, <-chan StreamEvent, error)
// EnsureReady ensures the model provider is ready (e.g., starts Ollama if needed)
// Returns an error if the provider cannot be made ready
EnsureReady(ctx context.Context) error
}
Provider is the interface that all model providers must implement
func NewProvider ¶
func NewProvider(cfg *config.OrlaConfig) (Provider, error)
NewProvider creates a new model provider based on the configuration
func NewProviderFromBackend ¶
func NewProviderFromBackend(backend *core.LLMBackend, modelID string) (Provider, error)
NewProviderFromBackend creates a new model provider from a backend and model identifier. This is the programmatic entry point used by the serving layer.
type ProviderFactory ¶
type ProviderFactory func(modelName string, backend *core.LLMBackend, cfg *config.OrlaConfig) (Provider, error)
ProviderFactory creates a provider for a parsed model name and backend context.
type RequestSchedulingPolicy ¶
type RequestSchedulingPolicy string
RequestSchedulingPolicy controls how requests within a single stage queue are ordered.
const ( // RequestSchedulingPolicyFIFO processes requests in arrival order (default). RequestSchedulingPolicyFIFO RequestSchedulingPolicy = "fifo" // RequestSchedulingPolicyPriority processes the highest-priority request first. RequestSchedulingPolicyPriority RequestSchedulingPolicy = "priority" )
type Response ¶
type Response struct {
Content string `json:"content"` // Text content from the model
Thinking string `json:"thinking"` // Thinking trace from the model (if supported)
ToolCalls []ToolCallWithID `json:"tool_calls"` // Tool calls requested by the model
Metrics *ResponseMetrics `json:"metrics"` // Response metrics
}
Response represents a model response
type ResponseMetrics ¶
type ResponseMetrics struct {
// TTFTMs is time to first token in milliseconds. Only set when task was executed with streaming.
TTFTMs int64 `json:"ttft_ms,omitempty"`
// TPOTMs is time per output token in milliseconds. Only set when task was executed with streaming.
TPOTMs int64 `json:"tpot_ms,omitempty"`
// PromptTokens is the number of tokens in the prompt (input). Reported by the backend.
PromptTokens int `json:"prompt_tokens,omitempty"`
// CompletionTokens is the number of tokens generated (output). Reported by the backend.
CompletionTokens int `json:"completion_tokens,omitempty"`
// QueueWaitMs is the time spent waiting in Orla's backend scheduler queue.
QueueWaitMs int64 `json:"queue_wait_ms,omitempty"`
// SchedulerDecisionMs is the time spent selecting the next request in the scheduler.
SchedulerDecisionMs int64 `json:"scheduler_decision_ms,omitempty"`
// DispatchMs is the request dispatch/setup time between scheduler dequeue and provider return.
DispatchMs int64 `json:"dispatch_ms,omitempty"`
// BackendLatencyMs is end-to-end backend latency for non-streaming calls.
BackendLatencyMs int64 `json:"backend_latency_ms,omitempty"`
}
type SchedulingHints ¶
type SchedulingHints struct {
// Priority is optional and used by priority-based scheduling policies.
Priority *int `json:"priority,omitempty"`
}
SchedulingHints are optional policy-specific hints attached to an inference request.
func (*SchedulingHints) GetPriority ¶
func (h *SchedulingHints) GetPriority() int
GetPriority returns the priority score or 0 when unset.
type SchedulingPolicy ¶
type SchedulingPolicy string
SchedulingPolicy controls how the server picks the next stage queue on a backend.
const ( // SchedulingPolicyFCFS is first-come-first-served stage scheduling. SchedulingPolicyFCFS SchedulingPolicy = "fcfs" // SchedulingPolicyPriority picks the stage with the highest-priority head request. SchedulingPolicyPriority SchedulingPolicy = "priority" )
type StreamEvent ¶
type StreamEvent interface {
// Type returns the type of stream event
Type() StreamEventType
}
StreamEvent represents a single event in the streaming response
type StreamEventType ¶
type StreamEventType string
StreamEventType represents the type of stream event
const ( StreamEventTypeContent StreamEventType = "content" // Text content chunk StreamEventTypeToolCall StreamEventType = "toolcall" // Tool call notification StreamEventTypeThinking StreamEventType = "thinking" // Thinking trace chunk )
type StreamWriter ¶
StreamWriter is an interface for writing streaming responses
type StructuredOutputOptions ¶
type StructuredOutputOptions struct {
Name string `json:"name"` // Required by OpenAI for json_schema response_format
Strict bool `json:"strict,omitempty"` // If true, response is guaranteed to conform to schema (default true when used)
Schema json.RawMessage `json:"schema"` // JSON Schema object. The schema must be valid when set
}
StructuredOutputOptions requests the model to return content conforming to a JSON Schema.
type ThinkingEvent ¶
type ThinkingEvent struct {
Content string
}
ThinkingEvent represents a thinking trace chunk in the stream
func (*ThinkingEvent) Type ¶
func (e *ThinkingEvent) Type() StreamEventType
type ToolCallEvent ¶
ToolCallEvent represents a tool call notification in the stream
func (*ToolCallEvent) Type ¶
func (e *ToolCallEvent) Type() StreamEventType
type ToolCallWithID ¶
type ToolCallWithID struct {
ID string `json:"id"` // Unique identifier for this tool call
McpCallToolParams mcp.CallToolParams
}
ToolCallWithID represents a tool invocation request from the model. It embeds mcp.CallToolParams for MCP compatibility, and adds an ID for tracking in the agent loop so we can match results back to calls.