model

package

v1.2.10 Latest Latest Go to latest Published: Mar 23, 2026 License: MIT Imports: 15 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/harvard-cns/orla

Links

Open Source Insights

Documentation ¶

Overview ¶

Package model provides the Provider interface and implementations. This file contains MockLLMServer for testing - an HTTP server that speaks the OpenAI-compatible chat API.

Package model provides the Provider interface and implementations. This file contains MockProvider for testing.

Package model provides model integration for Orla Agent Mode (RFC 4).

Index ¶

func ParseModelIdentifier(modelID string) (provider, modelName string, err error)
func RegisterProviderFactory(providerName string, factory ProviderFactory)
type ContentEvent
- func (e *ContentEvent) Type() StreamEventType
type InferenceOptions
- func (o InferenceOptions) GetSchedulingPolicy() SchedulingPolicy
type Message
type MessageRole
- func (r MessageRole) String() string
type MockLLMServer
- func (s *MockLLMServer) Close()
- func (s *MockLLMServer) LastRequestBody() []byte
- func (s *MockLLMServer) URL() string
type MockLLMServerBuilder
- func NewMockLLMServer() *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnContent(content string) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnNoChoices() *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnStreamChunks(chunks []string) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnStreamWithToolCalls(content string, toolCalls ...mockLLMToolCall) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnToolCall(name, argsJSON string) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) ReturnToolCallWithID(id, name, argsJSON string) *MockLLMServerBuilder
- func (b *MockLLMServerBuilder) Start() *MockLLMServer
type MockProvider
- func (m *MockProvider) CallCount() int
- func (m *MockProvider) Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, ...) (*Response, <-chan StreamEvent, error)
- func (m *MockProvider) EnsureReady(ctx context.Context) error
- func (m *MockProvider) LastInferenceOptions() *InferenceOptions
- func (m *MockProvider) Name() string
- func (m *MockProvider) ReceivedMessages() [][]Message
type MockProviderBuilder
- func NewMockProvider() *MockProviderBuilder
- func (b *MockProviderBuilder) Build() *MockProvider
- func (b *MockProviderBuilder) WithChatError(err error) *MockProviderBuilder
- func (b *MockProviderBuilder) WithChatFunc(...) *MockProviderBuilder
- func (b *MockProviderBuilder) WithContent(content string) *MockProviderBuilder
- func (b *MockProviderBuilder) WithEnsureReadyError(err error) *MockProviderBuilder
- func (b *MockProviderBuilder) WithName(name string) *MockProviderBuilder
- func (b *MockProviderBuilder) WithStreamChunks(chunks []string) *MockProviderBuilder
- func (b *MockProviderBuilder) WithToolCall(name, argsJSON string) *MockProviderBuilder
type OpenAIProvider
- func NewOpenAIProvider(modelName string, llmBackend *core.LLMBackend) (*OpenAIProvider, error)
- func (p *OpenAIProvider) Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, ...) (*Response, <-chan StreamEvent, error)
- func (p *OpenAIProvider) EnsureReady(ctx context.Context) error
- func (p *OpenAIProvider) Name() string
type Provider
- func NewProvider(cfg *config.OrlaConfig) (Provider, error)
- func NewProviderFromBackend(backend *core.LLMBackend, modelID string) (Provider, error)
type ProviderFactory
type RequestSchedulingPolicy
type Response
type ResponseMetrics
type SchedulingHints
- func (h *SchedulingHints) GetPriority() int
type SchedulingPolicy
type StreamEvent
type StreamEventType
type StreamWriter
type StructuredOutputOptions
type ThinkingEvent
- func (e *ThinkingEvent) Type() StreamEventType
type ToolCallEvent
- func (e *ToolCallEvent) Type() StreamEventType
type ToolCallWithID

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func ParseModelIdentifier ¶

func ParseModelIdentifier(modelID string) (provider, modelName string, err error)

ParseModelIdentifier parses a model identifier string (e.g., "openai:llama3") and returns the provider name and model name

func RegisterProviderFactory ¶

func RegisterProviderFactory(providerName string, factory ProviderFactory)

RegisterProviderFactory registers a provider factory by provider name (e.g. "openai").

Types ¶

type ContentEvent ¶

type ContentEvent struct {
	Content string
}

ContentEvent represents a content chunk in the stream

func (*ContentEvent) Type ¶

func (e *ContentEvent) Type() StreamEventType

type InferenceOptions ¶

type InferenceOptions struct {
	// Stream is whether to stream the response. A nil value means no streaming.
	Stream bool `json:"stream,omitempty"`
	// MaxTokens is the maximum number of tokens to generate. A nil value means use the backend default.
	MaxTokens *int `json:"max_tokens,omitempty"`
	// Temperature is the temperature parameter for sampling. A nil value means use the backend default.
	Temperature *float64 `json:"temperature,omitempty"`
	// TopP is the nucleus sampling top_p parameter. A nil value means use the backend default.
	TopP *float64 `json:"top_p,omitempty"`
	// ResponseFormat is the structured output options. A nil value means no structured output.
	ResponseFormat *StructuredOutputOptions `json:"response_format,omitempty"`
	// ChatTemplateKwargs are extra kwargs passed to the chat template renderer
	ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
	// SchedulingPolicy selects stage-level backend queue scheduling behavior.
	SchedulingPolicy SchedulingPolicy `json:"scheduling_policy,omitempty"`
	// RequestSchedulingPolicy selects request-level ordering within a stage queue.
	RequestSchedulingPolicy RequestSchedulingPolicy `json:"request_scheduling_policy,omitempty"`
	// SchedulingHints are optional policy hints for backend queueing.
	SchedulingHints *SchedulingHints `json:"scheduling_hints,omitempty"`
	// ReasoningEffort controls thinking for reasoning-capable models ("high", "medium", "low", "none").
	ReasoningEffort string `json:"reasoning_effort,omitempty"`
}

InferenceOptions holds per-request inference settings (agent profile knobs). JSON tags match the execute API so the server can embed this in ExecuteRequest.

func (InferenceOptions) GetSchedulingPolicy ¶

func (o InferenceOptions) GetSchedulingPolicy() SchedulingPolicy

GetSchedulingPolicy returns the configured scheduling policy or the FCFS default.

type Message ¶

type Message struct {
	// Role of the message
	Role MessageRole `json:"role"`
	// Content of the message
	Content string `json:"content"`
	// ToolName is used by the Ollama API
	ToolName string `json:"tool_name,omitempty"`
	// ToolCallID is used by the OpenAI API and vLLM
	ToolCallID string `json:"tool_call_id,omitempty"`
	// ToolCalls carries the tool calls from an assistant message so they can be
	// replayed in the conversation history on the next turn.
	ToolCalls []ToolCallWithID `json:"tool_calls,omitempty"`
}

Message represents a chat message in a conversation. For role "tool" i.e tool calls, set both ToolCallID and ToolName when building messages. The OpenAI API uses ToolCallID and the Ollama API uses ToolName. The providers ignore the field they do not need.

type MessageRole ¶

type MessageRole string

const (
	MessageRoleUser      MessageRole = "user"
	MessageRoleAssistant MessageRole = "assistant"
	MessageRoleSystem    MessageRole = "system"
	MessageRoleTool      MessageRole = "tool"
)

func (MessageRole) String ¶

func (r MessageRole) String() string

type MockLLMServer ¶

type MockLLMServer struct {
	// contains filtered or unexported fields
}

MockLLMServer is an HTTP server that speaks the OpenAI-compatible chat API. Used for integration tests without real API keys or external backends.

func (*MockLLMServer) Close ¶

func (s *MockLLMServer) Close()

Close shuts down the server.

func (*MockLLMServer) LastRequestBody ¶

func (s *MockLLMServer) LastRequestBody() []byte

LastRequestBody returns a copy of the raw request body from the most recent chat request. Tests can json.Unmarshal into openai.ChatCompletionRequest to assert on request fields.

func (*MockLLMServer) URL ¶

func (s *MockLLMServer) URL() string

URL returns the server URL.

type MockLLMServerBuilder ¶

type MockLLMServerBuilder struct {
	// contains filtered or unexported fields
}

MockLLMServerBuilder builds a MockLLMServer with a fluent API.

func NewMockLLMServer ¶

func NewMockLLMServer() *MockLLMServerBuilder

NewMockLLMServer returns a new MockLLMServerBuilder.

func (*MockLLMServerBuilder) ReturnContent ¶

func (b *MockLLMServerBuilder) ReturnContent(content string) *MockLLMServerBuilder

ReturnContent sets the response content for non-streaming.

func (*MockLLMServerBuilder) ReturnNoChoices ¶

func (b *MockLLMServerBuilder) ReturnNoChoices() *MockLLMServerBuilder

ReturnNoChoices configures the server to return an empty choices array (triggers "no choices" error).

func (*MockLLMServerBuilder) ReturnStreamChunks ¶

func (b *MockLLMServerBuilder) ReturnStreamChunks(chunks []string) *MockLLMServerBuilder

ReturnStreamChunks sets streaming response chunks.

func (*MockLLMServerBuilder) ReturnStreamWithToolCalls ¶

func (b *MockLLMServerBuilder) ReturnStreamWithToolCalls(content string, toolCalls ...mockLLMToolCall) *MockLLMServerBuilder

ReturnStreamWithToolCalls configures streaming response with content followed by tool calls.

func (*MockLLMServerBuilder) ReturnToolCall ¶

func (b *MockLLMServerBuilder) ReturnToolCall(name, argsJSON string) *MockLLMServerBuilder

ReturnToolCall adds a tool call to the response. argsJSON is the JSON-encoded arguments.

func (*MockLLMServerBuilder) ReturnToolCallWithID ¶

func (b *MockLLMServerBuilder) ReturnToolCallWithID(id, name, argsJSON string) *MockLLMServerBuilder

ReturnToolCallWithID adds a tool call with a specific ID.

func (*MockLLMServerBuilder) Start ¶

func (b *MockLLMServerBuilder) Start() *MockLLMServer

Start starts the server and returns it. Call Close when done.

type MockProvider ¶

type MockProvider struct {
	// contains filtered or unexported fields
}

MockProvider is a mock implementation of Provider for testing. Supports both builder-based configuration and function-based customization.

func (*MockProvider) CallCount ¶

func (m *MockProvider) CallCount() int

CallCount returns the number of times Chat has been called.

func (*MockProvider) Chat ¶

func (m *MockProvider) Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, opts InferenceOptions) (*Response, <-chan StreamEvent, error)

Chat sends a chat request and returns the configured or custom response.

func (*MockProvider) EnsureReady ¶

func (m *MockProvider) EnsureReady(ctx context.Context) error

EnsureReady returns nil or the configured error.

func (*MockProvider) LastInferenceOptions ¶

func (m *MockProvider) LastInferenceOptions() *InferenceOptions

LastInferenceOptions returns the options from the most recent Chat call.

func (*MockProvider) Name ¶

func (m *MockProvider) Name() string

Name returns the provider name.

func (*MockProvider) ReceivedMessages ¶

func (m *MockProvider) ReceivedMessages() [][]Message

ReceivedMessages returns a copy of all message slices passed to Chat.

type MockProviderBuilder ¶

type MockProviderBuilder struct {
	// contains filtered or unexported fields
}

MockProviderBuilder builds a MockProvider with a fluent API.

func NewMockProvider ¶

func NewMockProvider() *MockProviderBuilder

NewMockProvider returns a new MockProviderBuilder.

func (*MockProviderBuilder) Build ¶

func (b *MockProviderBuilder) Build() *MockProvider

Build returns the configured MockProvider.

func (*MockProviderBuilder) WithChatError ¶

func (b *MockProviderBuilder) WithChatError(err error) *MockProviderBuilder

WithChatError configures the provider to return an error from Chat.

func (*MockProviderBuilder) WithChatFunc ¶

func (b *MockProviderBuilder) WithChatFunc(fn func(ctx context.Context, messages []Message, tools []*mcp.Tool, opts InferenceOptions) (*Response, <-chan StreamEvent, error)) *MockProviderBuilder

WithChatFunc sets a custom Chat implementation (overrides builder-configured response).

func (*MockProviderBuilder) WithContent ¶

func (b *MockProviderBuilder) WithContent(content string) *MockProviderBuilder

WithContent sets the response content (non-streaming).

func (*MockProviderBuilder) WithEnsureReadyError ¶

func (b *MockProviderBuilder) WithEnsureReadyError(err error) *MockProviderBuilder

WithEnsureReadyError configures the provider to return an error from EnsureReady.

func (*MockProviderBuilder) WithName ¶

func (b *MockProviderBuilder) WithName(name string) *MockProviderBuilder

WithName sets the provider name.

func (*MockProviderBuilder) WithStreamChunks ¶

func (b *MockProviderBuilder) WithStreamChunks(chunks []string) *MockProviderBuilder

WithStreamChunks configures streaming response with the given chunks.

func (*MockProviderBuilder) WithToolCall ¶

func (b *MockProviderBuilder) WithToolCall(name, argsJSON string) *MockProviderBuilder

WithToolCall adds a tool call to the response. argsJSON is the JSON-encoded arguments.

type OpenAIProvider ¶

type OpenAIProvider struct {
	// contains filtered or unexported fields
}

OpenAIProvider implements the Provider interface for OpenAI-compatible APIs. This provider is intended to work with any server that implements the OpenAI Chat Completions API format such as LM Studio, vLLM, and Ollama. For Ollama, use endpoint http://host:11434/v1 [1]. [1] https://docs.ollama.com/api/openai-compatibility

func NewOpenAIProvider ¶

func NewOpenAIProvider(modelName string, llmBackend *core.LLMBackend) (*OpenAIProvider, error)

NewOpenAIProvider creates a new OpenAI-compatible provider. This works with any server that implements the OpenAI Chat Completions API format.

func (*OpenAIProvider) Chat ¶

func (p *OpenAIProvider) Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, opts InferenceOptions) (*Response, <-chan StreamEvent, error)

Chat sends a chat request to the OpenAI-compatible API. This works with any server implementing the OpenAI Chat Completions API format.

func (*OpenAIProvider) EnsureReady ¶

func (p *OpenAIProvider) EnsureReady(ctx context.Context) error

EnsureReady is a no-op for the OpenAI-compatible provider.

func (*OpenAIProvider) Name ¶

func (p *OpenAIProvider) Name() string

Name returns the provider name

type Provider ¶

type Provider interface {
	// Name returns the provider name (e.g., "openai", "anthropic")
	Name() string

	// Chat sends a chat request to the model with the given inference options and returns the response.
	Chat(ctx context.Context, messages []Message, tools []*mcp.Tool, opts InferenceOptions) (*Response, <-chan StreamEvent, error)

	// EnsureReady ensures the model provider is ready (e.g., starts Ollama if needed)
	// Returns an error if the provider cannot be made ready
	EnsureReady(ctx context.Context) error
}

Provider is the interface that all model providers must implement

func NewProvider ¶

func NewProvider(cfg *config.OrlaConfig) (Provider, error)

NewProvider creates a new model provider based on the configuration

func NewProviderFromBackend ¶

func NewProviderFromBackend(backend *core.LLMBackend, modelID string) (Provider, error)

NewProviderFromBackend creates a new model provider from a backend and model identifier. This is the programmatic entry point used by the serving layer.

type ProviderFactory ¶

type ProviderFactory func(modelName string, backend *core.LLMBackend, cfg *config.OrlaConfig) (Provider, error)

ProviderFactory creates a provider for a parsed model name and backend context.

type RequestSchedulingPolicy ¶

type RequestSchedulingPolicy string

RequestSchedulingPolicy controls how requests within a single stage queue are ordered.

const (
	// RequestSchedulingPolicyFIFO processes requests in arrival order (default).
	RequestSchedulingPolicyFIFO RequestSchedulingPolicy = "fifo"
	// RequestSchedulingPolicyPriority processes the highest-priority request first.
	RequestSchedulingPolicyPriority RequestSchedulingPolicy = "priority"
)

type Response ¶

type Response struct {
	Content   string           `json:"content"`    // Text content from the model
	Thinking  string           `json:"thinking"`   // Thinking trace from the model (if supported)
	ToolCalls []ToolCallWithID `json:"tool_calls"` // Tool calls requested by the model
	Metrics   *ResponseMetrics `json:"metrics"`    // Response metrics
}

Response represents a model response

type ResponseMetrics ¶

type ResponseMetrics struct {
	// TTFTMs is time to first token in milliseconds. Only set when task was executed with streaming.
	TTFTMs int64 `json:"ttft_ms,omitempty"`
	// TPOTMs is time per output token in milliseconds. Only set when task was executed with streaming.
	TPOTMs int64 `json:"tpot_ms,omitempty"`
	// PromptTokens is the number of tokens in the prompt (input). Reported by the backend.
	PromptTokens int `json:"prompt_tokens,omitempty"`
	// CompletionTokens is the number of tokens generated (output). Reported by the backend.
	CompletionTokens int `json:"completion_tokens,omitempty"`
	// QueueWaitMs is the time spent waiting in Orla's backend scheduler queue.
	QueueWaitMs int64 `json:"queue_wait_ms,omitempty"`
	// SchedulerDecisionMs is the time spent selecting the next request in the scheduler.
	SchedulerDecisionMs int64 `json:"scheduler_decision_ms,omitempty"`
	// DispatchMs is the request dispatch/setup time between scheduler dequeue and provider return.
	DispatchMs int64 `json:"dispatch_ms,omitempty"`
	// BackendLatencyMs is end-to-end backend latency for non-streaming calls.
	BackendLatencyMs int64 `json:"backend_latency_ms,omitempty"`
}

type SchedulingHints ¶

type SchedulingHints struct {
	// Priority is optional and used by priority-based scheduling policies.
	Priority *int `json:"priority,omitempty"`
}

SchedulingHints are optional policy-specific hints attached to an inference request.

func (*SchedulingHints) GetPriority ¶

func (h *SchedulingHints) GetPriority() int

GetPriority returns the priority score or 0 when unset.

type SchedulingPolicy ¶

type SchedulingPolicy string

SchedulingPolicy controls how the server picks the next stage queue on a backend.

const (
	// SchedulingPolicyFCFS is first-come-first-served stage scheduling.
	SchedulingPolicyFCFS SchedulingPolicy = "fcfs"
	// SchedulingPolicyPriority picks the stage with the highest-priority head request.
	SchedulingPolicyPriority SchedulingPolicy = "priority"
)

type StreamEvent ¶

type StreamEvent interface {
	// Type returns the type of stream event
	Type() StreamEventType
}

StreamEvent represents a single event in the streaming response

type StreamEventType ¶

type StreamEventType string

StreamEventType represents the type of stream event

const (
	StreamEventTypeContent  StreamEventType = "content"  // Text content chunk
	StreamEventTypeToolCall StreamEventType = "toolcall" // Tool call notification
	StreamEventTypeThinking StreamEventType = "thinking" // Thinking trace chunk
)

type StreamWriter ¶

type StreamWriter interface {
	io.Writer
	Flush() error
}

StreamWriter is an interface for writing streaming responses

type StructuredOutputOptions ¶

type StructuredOutputOptions struct {
	Name   string          `json:"name"`             // Required by OpenAI for json_schema response_format
	Strict bool            `json:"strict,omitempty"` // If true, response is guaranteed to conform to schema (default true when used)
	Schema json.RawMessage `json:"schema"`           // JSON Schema object. The schema must be valid when set
}

StructuredOutputOptions requests the model to return content conforming to a JSON Schema.

type ThinkingEvent ¶

type ThinkingEvent struct {
	Content string
}

ThinkingEvent represents a thinking trace chunk in the stream

func (*ThinkingEvent) Type ¶

func (e *ThinkingEvent) Type() StreamEventType

type ToolCallEvent ¶

type ToolCallEvent struct {
	Name      string
	Arguments map[string]any
}

ToolCallEvent represents a tool call notification in the stream

func (*ToolCallEvent) Type ¶

func (e *ToolCallEvent) Type() StreamEventType

type ToolCallWithID ¶

type ToolCallWithID struct {
	ID                string `json:"id"` // Unique identifier for this tool call
	McpCallToolParams mcp.CallToolParams
}

ToolCallWithID represents a tool invocation request from the model. It embeds mcp.CallToolParams for MCP compatibility, and adds an ID for tracking in the agent loop so we can match results back to calls.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL