orla

package

v1.2.15 Latest Latest Go to latest Published: Apr 18, 2026 License: MIT Imports: 13 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/harvard-cns/orla

Links

Open Source Insights

Documentation ¶

Overview ¶

Package orla provides a basic public Go client library for Orla server.

This client is frozen at its current feature set. New features (cost-based routing, accuracy, fallback policies, budget enforcement) are added to the HTTP API and the pyorla Python client only. Go consumers needing new features should use the HTTP API directly.

Index ¶

Constants
func ApplyStageMappingOutput(stages []*Stage, output *StageMappingOutput) error
func LogDeferredError(fn func() error)
type CacheEvent
type CacheHints
type ExecuteRequest
- func (r *ExecuteRequest) GetStageID() string
type ExecuteResponse
type ExecutionMode
type ExplicitStageMapping
- func (m *ExplicitStageMapping) Map(input *StageMappingInput) (*StageMappingOutput, error)
type InferenceResponse
type InferenceResponseMetrics
type LLMBackend
- func NewOllamaBackend(modelID string, endpoint string) *LLMBackend
- func NewSGLangBackend(modelID string, endpoint string) *LLMBackend
- func NewVLLMBackend(modelID string, endpoint string) *LLMBackend
type MemoryPolicy
- func NewDefaultMemoryPolicy(opts ...MemoryPolicyOption) MemoryPolicy
- func NewFlushAtBoundaryPolicy() MemoryPolicy
- func NewPreserveOnSmallIncrementPolicy(thresholdTokens int) MemoryPolicy
type MemoryPolicyOption
- func WithPreserveThreshold(tokens int) MemoryPolicyOption
type Message
type OneBitPredictor
- func NewOneBitPredictor(client *OrlaClient, backend *LLMBackend) *OneBitPredictor
- func (p *OneBitPredictor) Predict(ctx context.Context, prompt string) (bool, error)
type OneBitStageMapper
- func NewOneBitStageMapper(client *OrlaClient, backend *LLMBackend, stageOne *Stage, stageTwo *Stage) *OneBitStageMapper
- func (m *OneBitStageMapper) MapStage(ctx context.Context, prompt string) (*Stage, error)
type OrlaClient
- func NewOrlaClient(baseURL string) *OrlaClient
- func (c *OrlaClient) Execute(ctx context.Context, req *ExecuteRequest) (*InferenceResponse, error)
- func (c *OrlaClient) ExecuteStream(ctx context.Context, req *ExecuteRequest) (<-chan StreamEvent, error)
- func (c *OrlaClient) Health(ctx context.Context) error
- func (c *OrlaClient) RegisterBackend(ctx context.Context, req *RegisterBackendRequest) error
- func (c *OrlaClient) WorkflowComplete(ctx context.Context, workflowID string, backends []string) error
type PromptScorer
type RawToolCall
- func (r RawToolCall) MarshalJSON() ([]byte, error)
- func (r *RawToolCall) UnmarshalJSON(data []byte) error
type RegisterBackendRequest
- func (r *RegisterBackendRequest) SetMaxConcurrency(n int)
- func (r *RegisterBackendRequest) SetQueueCapacity(n int)
type RegisterBackendResponse
type SchedulingHints
type Stage
- func NewStage(name string, backend *LLMBackend) *Stage
- func (s *Stage) AddTool(t *Tool) error
- func (s *Stage) ConsumeStream(ctx context.Context, stream <-chan StreamEvent, handler StreamHandler) (*InferenceResponse, error)
- func (s *Stage) Execute(ctx context.Context, prompt string) (*InferenceResponse, error)
- func (s *Stage) ExecuteStream(ctx context.Context, prompt string) (<-chan StreamEvent, error)
- func (s *Stage) ExecuteStreamWithMessages(ctx context.Context, messages []Message) (<-chan StreamEvent, error)
- func (s *Stage) ExecuteWithMessages(ctx context.Context, messages []Message) (*InferenceResponse, error)
- func (s *Stage) RunToolCall(ctx context.Context, toolCall *ToolCall) (*ToolResult, error)
- func (s *Stage) RunToolCallsInResponse(ctx context.Context, response *InferenceResponse) ([]*Message, error)
- func (s *Stage) RunToolCallsInResponseAndGetToolResults(ctx context.Context, response *InferenceResponse) ([]*ToolResult, error)
- func (s *Stage) SetCacheHints(hints *CacheHints)
- func (s *Stage) SetCachePolicy(policy string)
- func (s *Stage) SetChatTemplateKwargs(kwargs map[string]any)
- func (s *Stage) SetExecutionMode(mode ExecutionMode)
- func (s *Stage) SetMaxTokens(n int)
- func (s *Stage) SetMaxTurns(n int)
- func (s *Stage) SetMessagesBuilder(builder StageMessagesBuilder)
- func (s *Stage) SetPromptBuilder(builder StagePromptBuilder)
- func (s *Stage) SetReasoningEffort(effort string)
- func (s *Stage) SetRequestSchedulingPolicy(policy string)
- func (s *Stage) SetResponseFormat(r *StructuredOutputRequest)
- func (s *Stage) SetSchedulingHints(hints *SchedulingHints)
- func (s *Stage) SetSchedulingPolicy(policy string)
- func (s *Stage) SetStream(enabled bool)
- func (s *Stage) SetTemperature(f float64)
- func (s *Stage) SetTopP(f float64)
type StageAssignment
type StageMapper
type StageMapping
type StageMappingInput
type StageMappingOutput
type StageMessagesBuilder
type StagePromptBuilder
type StageResult
type StreamEvent
type StreamHandler
type StructuredOutputRequest
- func NewStructuredOutputRequest(name string, schema any) *StructuredOutputRequest
type ThresholdStageMapper
- func NewThresholdStageMapper(threshold float64, lowStage, highStage *Stage, scoreFn PromptScorer) *ThresholdStageMapper
- func (m *ThresholdStageMapper) MapStage(_ context.Context, prompt string) (*Stage, error)
type Tool
- func NewTool(name, description string, inputSchema, outputSchema ToolSchema, run ToolRunner) (*Tool, error)
type ToolCall
- func NewToolCallFromRawToolCall(rawToolCall RawToolCall) (*ToolCall, error)
type ToolCallDelta
type ToolResult
- func (r ToolResult) ToMessage() (*Message, error)
type ToolRunner
- func ToolRunnerFromSchema(fn func(ctx context.Context, input ToolSchema) (ToolSchema, error)) ToolRunner
type ToolSchema
type Workflow
- func NewWorkflow(client *OrlaClient) *Workflow
- func (w *Workflow) AddDependency(stageID, dependsOnStageID string) error
- func (w *Workflow) AddStage(s *Stage) error
- func (w *Workflow) Execute(ctx context.Context) (map[string]*StageResult, error)
- func (w *Workflow) MemoryPolicyOrDefault() MemoryPolicy
- func (w *Workflow) SetMemoryPolicy(policy MemoryPolicy)
- func (w *Workflow) Stages() map[string]*Stage

Constants ¶

View Source

const (
	SchedulingPolicyFCFS     = "fcfs"
	SchedulingPolicyPriority = "priority"
)

View Source

const (
	CachePolicyPreserve = "preserve"
	CachePolicyFlush    = "flush"
	CachePolicyAuto     = "auto"
)

CachePolicy constants for stage-level cache policy overrides.

Variables ¶

This section is empty.

Functions ¶

func ApplyStageMappingOutput ¶

func ApplyStageMappingOutput(stages []*Stage, output *StageMappingOutput) error

ApplyStageMappingOutput applies mapping output to stages, overwriting their backend and inference parameters with the assigned values.

func LogDeferredError ¶

func LogDeferredError(fn func() error)

LogDeferredError takes a function that returns an error, calls it, and logs the error if it is not nil

Types ¶

type CacheEvent ¶

type CacheEvent struct {
	PrevStageBackend string
	PrevStageModel   string
	NextStageBackend string
	NextStageModel   string
	DeltaTokens      int
	TotalTokens      int
	TransitionType   string // "stage", "workflow_complete"
}

CacheEvent describes a stage transition for the MemoryPolicy to evaluate.

type CacheHints ¶

type CacheHints struct {
	PreserveThresholdTokens *int  `json:"preserve_threshold_tokens,omitempty"`
	FlushOnComplete         *bool `json:"flush_on_complete,omitempty"`
}

CacheHints are optional per-stage parameters that influence the Memory Manager's cache decisions. They follow the same pattern as SchedulingHints.

type ExecuteRequest ¶

type ExecuteRequest struct {
	Backend string `json:"backend"`
	// StageID is the globally unique stage ID for this request.
	StageID string `json:"stage_id,omitempty"`
	// Prompt is the prompt to execute.
	Prompt string `json:"prompt,omitempty"`
	// Messages are the messages to execute.
	Messages []Message `json:"messages,omitempty"`
	// Tools are the tools attached to this request.
	Tools []*mcp.Tool `json:"tools,omitempty"`
	// MaxTokens is the maximum number of tokens to generate. A nil value means use the backend default.
	MaxTokens *int `json:"max_tokens,omitempty"`
	// Stream is whether to stream the response. A nil value means no streaming.
	Stream bool `json:"stream,omitempty"`
	// Temperature is the temperature parameter for sampling. A nil value means use the backend default.
	Temperature *float64 `json:"temperature,omitempty"`
	// TopP is the nucleus sampling top_p parameter. A nil value means use the backend default.
	TopP *float64 `json:"top_p,omitempty"`
	// ResponseFormat is the structured output options. A nil value means no structured output.
	ResponseFormat *StructuredOutputRequest `json:"response_format,omitempty"`
	// ChatTemplateKwargs are extra kwargs passed to the chat template renderer
	ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
	// SchedulingPolicy selects stage-level server-side queue scheduling policy.
	SchedulingPolicy string `json:"scheduling_policy,omitempty"`
	// RequestSchedulingPolicy selects request-level ordering within a stage queue.
	RequestSchedulingPolicy string `json:"request_scheduling_policy,omitempty"`
	// SchedulingHints are optional policy hints attached to the request.
	SchedulingHints *SchedulingHints `json:"scheduling_hints,omitempty"`
	// WorkflowID groups requests from the same workflow execution for cache management.
	WorkflowID string `json:"workflow_id,omitempty"`
	// CachePolicy is the stage-level cache policy override ("preserve", "flush", or empty for auto).
	CachePolicy string `json:"cache_policy,omitempty"`
	// CacheHints are optional per-stage cache parameters.
	CacheHints *CacheHints `json:"cache_hints,omitempty"`
	// ReasoningEffort controls thinking for reasoning-capable models ("high", "medium", "low", "none").
	ReasoningEffort string `json:"reasoning_effort,omitempty"`
}

ExecuteRequest represents a request to execute inference on a named backend.

func (*ExecuteRequest) GetStageID ¶

func (r *ExecuteRequest) GetStageID() string

GetStageID returns the stage ID for this request.

type ExecuteResponse ¶

type ExecuteResponse struct {
	// Success is whether the execute call was successful.
	Success bool `json:"success"`
	// Response is the response from the execute call.
	Response *InferenceResponse `json:"response,omitempty"`
	// Error is the error from the execute call, if any.
	Error string `json:"error,omitempty"`
}

ExecuteResponse represents the response from an execute call.

type ExecutionMode ¶

type ExecutionMode string

ExecutionMode controls how a stage executes within a workflow DAG.

const (
	ExecutionModeSingleShot ExecutionMode = "single_shot"
	ExecutionModeAgentLoop  ExecutionMode = "agent_loop"
)

type ExplicitStageMapping ¶

type ExplicitStageMapping struct{}

ExplicitStageMapping validates that every stage already has a backend assigned. This is the default mapping strategy when the user sets backends directly on stages.

func (*ExplicitStageMapping) Map ¶

func (m *ExplicitStageMapping) Map(input *StageMappingInput) (*StageMappingOutput, error)

Map validates all stages have backends and returns their current assignments.

type InferenceResponse ¶

type InferenceResponse struct {
	Content   string                    `json:"content"`
	Thinking  string                    `json:"thinking,omitempty"`
	ToolCalls []RawToolCall             `json:"tool_calls,omitempty"`
	Metrics   *InferenceResponseMetrics `json:"metrics,omitempty"`
}

InferenceResponse represents the response from inference.

type InferenceResponseMetrics ¶

type InferenceResponseMetrics struct {
	TTFTMs              *int64   `json:"ttft_ms,omitempty"`
	TPOTMs              *int64   `json:"tpot_ms,omitempty"`
	PromptTokens        int      `json:"prompt_tokens,omitempty"`
	CompletionTokens    int      `json:"completion_tokens,omitempty"`
	QueueWaitMs         int64    `json:"queue_wait_ms,omitempty"`
	SchedulerDecisionMs int64    `json:"scheduler_decision_ms,omitempty"`
	DispatchMs          int64    `json:"dispatch_ms,omitempty"`
	BackendLatencyMs    *int64   `json:"backend_latency_ms,omitempty"`
	EstimatedCostUSD    *float64 `json:"estimated_cost_usd,omitempty"`
}

InferenceResponseMetrics holds timing and token usage metrics from execution. Pointer fields are mode-specific: nil means "not applicable for this request type".

type LLMBackend ¶

type LLMBackend = RegisterBackendRequest

func NewOllamaBackend ¶

func NewOllamaBackend(modelID string, endpoint string) *LLMBackend

NewOllamaBackend creates a backend that talks to Ollama's OpenAI-compatible API (/v1/chat/completions). The endpoint should be the base Ollama URL (e.g. "http://ollama:11434"); "/v1" is appended automatically.

func NewSGLangBackend ¶

func NewSGLangBackend(modelID string, endpoint string) *LLMBackend

func NewVLLMBackend ¶

func NewVLLMBackend(modelID string, endpoint string) *LLMBackend

type MemoryPolicy ¶

type MemoryPolicy interface {
	Decide(ctx context.Context, event CacheEvent) string // returns CachePolicyPreserve, CachePolicyFlush, or CachePolicyAuto
}

MemoryPolicy determines cache actions at workflow level. Developers can implement custom policies or use the built-in ones shipped with Orla.

func NewDefaultMemoryPolicy ¶

func NewDefaultMemoryPolicy(opts ...MemoryPolicyOption) MemoryPolicy

NewDefaultMemoryPolicy creates a MemoryPolicy that composes the three paper policies: preserve on small increment, then flush at boundary. The flush-under-pressure policy is handled server-side by the Memory Manager.

func NewFlushAtBoundaryPolicy ¶

func NewFlushAtBoundaryPolicy() MemoryPolicy

NewFlushAtBoundaryPolicy creates a MemoryPolicy that flushes cache at workflow boundaries and when the backend/model changes between stages.

func NewPreserveOnSmallIncrementPolicy ¶

func NewPreserveOnSmallIncrementPolicy(thresholdTokens int) MemoryPolicy

NewPreserveOnSmallIncrementPolicy creates a MemoryPolicy that preserves KV cache when the new stage adds fewer than thresholdTokens to the context.

type MemoryPolicyOption ¶

type MemoryPolicyOption func(*defaultMemoryPolicyConfig)

MemoryPolicyOption configures a DefaultMemoryPolicy.

func WithPreserveThreshold ¶

func WithPreserveThreshold(tokens int) MemoryPolicyOption

WithPreserveThreshold overrides the default token threshold for the preserve-on-small-increment policy.

type Message ¶

type Message struct {
	Role       string        `json:"role"`
	Content    string        `json:"content"`
	ToolCallID string        `json:"tool_call_id,omitempty"`
	ToolName   string        `json:"tool_name,omitempty"`
	ToolCalls  []RawToolCall `json:"tool_calls,omitempty"`
}

Message represents a chat message.

type OneBitPredictor ¶

type OneBitPredictor struct {
	Stage *Stage
}

OneBitPredictor is a predictor that returns a single bit of information.

func NewOneBitPredictor ¶

func NewOneBitPredictor(client *OrlaClient, backend *LLMBackend) *OneBitPredictor

NewOneBitPredictor returns a new OneBitPredictor. The predictor uses temperature 0 for deterministic classification.

func (*OneBitPredictor) Predict ¶

func (p *OneBitPredictor) Predict(ctx context.Context, prompt string) (bool, error)

Predict predicts a single bit of information. prompt is the text sent to the model.

type OneBitStageMapper ¶

type OneBitStageMapper struct {
	OneBitPredictor *OneBitPredictor
	StageOne        *Stage
	StageTwo        *Stage
	Prompt          string
}

OneBitStageMapper uses a one-bit predictor to map prompts to one of two stages.

func NewOneBitStageMapper ¶

func NewOneBitStageMapper(client *OrlaClient, backend *LLMBackend, stageOne *Stage, stageTwo *Stage) *OneBitStageMapper

NewOneBitStageMapper returns a new one-bit stage mapper.

func (*OneBitStageMapper) MapStage ¶

func (m *OneBitStageMapper) MapStage(ctx context.Context, prompt string) (*Stage, error)

MapStage maps the stage based on the prompt.

type OrlaClient ¶

type OrlaClient struct {
	// contains filtered or unexported fields
}

OrlaClient is the public API client for the Orla daemon

func NewOrlaClient ¶

func NewOrlaClient(baseURL string) *OrlaClient

NewOrlaClient creates a new daemon API client.

func (*OrlaClient) Execute ¶

func (c *OrlaClient) Execute(ctx context.Context, req *ExecuteRequest) (*InferenceResponse, error)

Execute runs inference on the named backend via the daemon.

func (*OrlaClient) ExecuteStream ¶

func (c *OrlaClient) ExecuteStream(ctx context.Context, req *ExecuteRequest) (<-chan StreamEvent, error)

ExecuteStream runs inference with streaming. The returned channel receives content, thinking, and tool_call deltas, then a final "done" event with the full Response. Caller must consume the channel until closed.

func (*OrlaClient) Health ¶

func (c *OrlaClient) Health(ctx context.Context) error

Health checks the health of the daemon

func (*OrlaClient) RegisterBackend ¶

func (c *OrlaClient) RegisterBackend(ctx context.Context, req *RegisterBackendRequest) error

RegisterBackend registers an LLM backend with the daemon. Call this before using the backend in Execute. Pass the same req (or the *LLMBackend) to NewWorkflow/NewStage after a successful registration.

func (*OrlaClient) WorkflowComplete ¶

func (c *OrlaClient) WorkflowComplete(ctx context.Context, workflowID string, backends []string) error

WorkflowComplete notifies the server that a workflow has finished so the Memory Manager can flush caches and clean up tracking state.

type PromptScorer ¶

type PromptScorer func(prompt string) float64

PromptScorer computes a routing score for a prompt.

type RawToolCall ¶

type RawToolCall []byte

RawToolCall is a raw tool call from the inference response.

func (RawToolCall) MarshalJSON ¶

func (r RawToolCall) MarshalJSON() ([]byte, error)

MarshalJSON returns the raw bytes as-is since they are already valid JSON. Without this, []byte would be base64-encoded by encoding/json.

func (*RawToolCall) UnmarshalJSON ¶

func (r *RawToolCall) UnmarshalJSON(data []byte) error

UnmarshalJSON stores the raw JSON bytes of the value (object or otherwise) so that tool_calls from the API response decode correctly.

type RegisterBackendRequest ¶

type RegisterBackendRequest struct {
	Name           string `json:"name"`                      // backend name (used as Backend in execute requests)
	Endpoint       string `json:"endpoint"`                  // e.g. "http://localhost:8000/v1"
	Type           string `json:"type"`                      // "openai" or "sglang"
	ModelID        string `json:"model_id"`                  // e.g. "openai:Qwen/Qwen3-4B-Instruct-2507"
	APIKeyEnvVar   string `json:"api_key_env_var,omitempty"` // optional env var for API key (openai-type)
	MaxConcurrency int    `json:"max_concurrency,omitempty"` // max concurrent requests dispatched to this backend (default 1)
	QueueCapacity  int    `json:"queue_capacity,omitempty"`  // max queued requests for this backend; 0 = default (4096)
}

RegisterBackendRequest is the request body for registering an LLM backend.

func (*RegisterBackendRequest) SetMaxConcurrency ¶

func (r *RegisterBackendRequest) SetMaxConcurrency(n int)

SetMaxConcurrency sets the maximum number of concurrent inference requests dispatched to this backend. Backends that support continuous batching (e.g. vLLM, SGLang) can process multiple requests simultaneously for better throughput. A value of 0 or 1 means serial dispatch (default).

func (*RegisterBackendRequest) SetQueueCapacity ¶

func (r *RegisterBackendRequest) SetQueueCapacity(n int)

SetQueueCapacity sets the maximum number of requests that may be queued for this backend. When full, new requests get an error (backpressure). Zero means use the server default (4096).

type RegisterBackendResponse ¶

type RegisterBackendResponse struct {
	Success bool   `json:"success"`
	Error   string `json:"error,omitempty"`
}

RegisterBackendResponse is the response from register backend.

type SchedulingHints ¶

type SchedulingHints struct {
	Priority        *int `json:"priority,omitempty"`
	RequestPriority *int `json:"request_priority,omitempty"`
}

SchedulingHints are optional server scheduling hints attached to execute requests.

type Stage ¶

type Stage struct {
	// ID is a globally unique identifier, auto-generated if not set.
	ID   string
	Name string

	// Client is required for execution methods (Execute, ExecuteStream, etc.).
	// Set automatically when the stage is added to a Workflow.
	Client  *OrlaClient
	Backend *LLMBackend

	Tools         map[string]*Tool
	ExecutionMode ExecutionMode
	MaxTurns      int // max turns for agent-loop mode; 0 means default (100)

	Prompt          string
	PromptBuilder   StagePromptBuilder
	MessagesBuilder StageMessagesBuilder

	MaxTokens          *int
	Temperature        *float64
	TopP               *float64
	ResponseFormat     *StructuredOutputRequest
	ChatTemplateKwargs map[string]any
	ReasoningEffort    string

	StageSchedulingPolicy   string
	RequestSchedulingPolicy string
	SchedulingHints         *SchedulingHints

	CachePolicy string      // "preserve", "flush", or "" (auto/default)
	CacheHnts   *CacheHints // per-stage cache hint overrides

	Stream bool // when true, workflow uses streaming execution to capture TTFT/TPOT
	// contains filtered or unexported fields
}

Stage is the primary execution unit in Orla. It holds backend, inference options, tools, execution mode, and scheduling configuration. Each Stage has a globally unique ID and can execute LLM inference calls directly.

func NewStage ¶

func NewStage(name string, backend *LLMBackend) *Stage

NewStage returns a new Stage with a globally unique ID and the given backend.

func (*Stage) AddTool ¶

func (s *Stage) AddTool(t *Tool) error

AddTool adds a tool to this stage. Returns an error if t is nil.

func (*Stage) ConsumeStream ¶

func (s *Stage) ConsumeStream(ctx context.Context, stream <-chan StreamEvent, handler StreamHandler) (*InferenceResponse, error)

ConsumeStream reads a stream until "done", accumulates content/thinking/metrics, and returns the result.

func (*Stage) Execute ¶

func (s *Stage) Execute(ctx context.Context, prompt string) (*InferenceResponse, error)

Execute runs a single non-streaming inference with the given prompt.

func (*Stage) ExecuteStream ¶

func (s *Stage) ExecuteStream(ctx context.Context, prompt string) (<-chan StreamEvent, error)

ExecuteStream runs inference with streaming; returns a channel of events.

func (*Stage) ExecuteStreamWithMessages ¶

func (s *Stage) ExecuteStreamWithMessages(ctx context.Context, messages []Message) (<-chan StreamEvent, error)

ExecuteStreamWithMessages runs streaming inference with the given message list and any attached tools.

func (*Stage) ExecuteWithMessages ¶

func (s *Stage) ExecuteWithMessages(ctx context.Context, messages []Message) (*InferenceResponse, error)

ExecuteWithMessages runs a single non-streaming inference with the given message list and any attached tools.

func (*Stage) RunToolCall ¶

func (s *Stage) RunToolCall(ctx context.Context, toolCall *ToolCall) (*ToolResult, error)

RunToolCall runs a single tool call against this stage's tools.

func (*Stage) RunToolCallsInResponse ¶

func (s *Stage) RunToolCallsInResponse(ctx context.Context, response *InferenceResponse) ([]*Message, error)

RunToolCallsInResponse runs the tool calls in the response and returns tool result messages.

func (*Stage) RunToolCallsInResponseAndGetToolResults ¶

func (s *Stage) RunToolCallsInResponseAndGetToolResults(ctx context.Context, response *InferenceResponse) ([]*ToolResult, error)

RunToolCallsInResponseAndGetToolResults parses tool calls from the response, runs each, and returns results.

func (*Stage) SetCacheHints ¶

func (s *Stage) SetCacheHints(hints *CacheHints)

func (*Stage) SetCachePolicy ¶

func (s *Stage) SetCachePolicy(policy string)

func (*Stage) SetChatTemplateKwargs ¶

func (s *Stage) SetChatTemplateKwargs(kwargs map[string]any)

func (*Stage) SetExecutionMode ¶

func (s *Stage) SetExecutionMode(mode ExecutionMode)

func (*Stage) SetMaxTokens ¶

func (s *Stage) SetMaxTokens(n int)

func (*Stage) SetMaxTurns ¶

func (s *Stage) SetMaxTurns(n int)

func (*Stage) SetMessagesBuilder ¶

func (s *Stage) SetMessagesBuilder(builder StageMessagesBuilder)

func (*Stage) SetPromptBuilder ¶

func (s *Stage) SetPromptBuilder(builder StagePromptBuilder)

func (*Stage) SetReasoningEffort ¶

func (s *Stage) SetReasoningEffort(effort string)

func (*Stage) SetRequestSchedulingPolicy ¶

func (s *Stage) SetRequestSchedulingPolicy(policy string)

func (*Stage) SetResponseFormat ¶

func (s *Stage) SetResponseFormat(r *StructuredOutputRequest)

func (*Stage) SetSchedulingHints ¶

func (s *Stage) SetSchedulingHints(hints *SchedulingHints)

func (*Stage) SetSchedulingPolicy ¶

func (s *Stage) SetSchedulingPolicy(policy string)

func (*Stage) SetStream ¶

func (s *Stage) SetStream(enabled bool)

func (*Stage) SetTemperature ¶

func (s *Stage) SetTemperature(f float64)

func (*Stage) SetTopP ¶

func (s *Stage) SetTopP(f float64)

type StageAssignment ¶

type StageAssignment struct {
	Backend        *LLMBackend
	MaxTokens      *int
	Temperature    *float64
	TopP           *float64
	ResponseFormat *StructuredOutputRequest
}

StageAssignment describes the backend and inference parameters assigned to a stage.

type StageMapper ¶

type StageMapper interface {
	MapStage(ctx context.Context, prompt string) (*Stage, error)
}

StageMapper maps a prompt to an execution stage.

type StageMapping ¶

type StageMapping interface {
	Map(input *StageMappingInput) (*StageMappingOutput, error)
}

StageMapping takes a set of stages and available backends, and assigns each stage to a specific backend with inference parameters. This is the planning step before workflow execution (Section 4.1 of the Orla paper).

type StageMappingInput ¶

type StageMappingInput struct {
	Stages   []*Stage
	Backends []*LLMBackend
}

StageMappingInput is the input to a StageMapping: all stages that need assignment and the available infrastructure (backends).

type StageMappingOutput ¶

type StageMappingOutput struct {
	Assignments map[string]*StageAssignment // stageID -> assignment
}

StageMappingOutput is the result of stage mapping: per-stage assignments.

type StageMessagesBuilder ¶

type StageMessagesBuilder func(results map[string]*StageResult) ([]Message, error)

StageMessagesBuilder builds messages from upstream dependency results for DAG execution.

type StagePromptBuilder ¶

type StagePromptBuilder func(results map[string]*StageResult) (string, error)

StagePromptBuilder builds a prompt from upstream dependency results for DAG execution.

type StageResult ¶

type StageResult struct {
	Response *InferenceResponse
	Messages []Message // full conversation history (populated for agent-loop stages)
}

StageResult wraps the output of a stage execution.

type StreamEvent ¶

type StreamEvent struct {
	Type     string             // "content", "thinking", "tool_call", or "done"
	Content  string             // content delta (Type == "content")
	Thinking string             // thinking delta (Type == "thinking")
	ToolCall *ToolCallDelta     // tool call (Type == "tool_call")
	Response *InferenceResponse // final response (Type == "done")
}

StreamEvent is a single event from ExecuteStream. Exactly one of Content, Thinking, ToolCall, or Response is set, depending on Type.

type StreamHandler ¶

type StreamHandler func(event StreamEvent) error

StreamHandler is an optional callback invoked for each stream event. ConsumeStream always accumulates and returns the full InferenceResponse; the handler is for side effects only.

type StructuredOutputRequest ¶

type StructuredOutputRequest struct {
	// Name is the name of the structured output. Required for json_schema response_format.
	Name string `json:"name"`
	// Strict is whether the response is guaranteed to conform to the schema.
	Strict bool `json:"strict,omitempty"`
	// Schema is the JSON Schema object (e.g. map[string]any or struct). The schema must be valid when set.
	Schema any `json:"schema"`
}

StructuredOutputRequest requests the model to return content conforming to a JSON Schema.

func NewStructuredOutputRequest ¶

func NewStructuredOutputRequest(name string, schema any) *StructuredOutputRequest

NewStructuredOutputRequest returns a new StructuredOutputRequest.

type ThresholdStageMapper ¶

type ThresholdStageMapper struct {
	Threshold float64
	LowStage  *Stage
	HighStage *Stage
	ScoreFn   PromptScorer
}

ThresholdStageMapper routes prompts to one of two stages by comparing score to a threshold.

func NewThresholdStageMapper ¶

func NewThresholdStageMapper(threshold float64, lowStage, highStage *Stage, scoreFn PromptScorer) *ThresholdStageMapper

NewThresholdStageMapper creates a stage mapper that routes by score threshold.

func (*ThresholdStageMapper) MapStage ¶

func (m *ThresholdStageMapper) MapStage(_ context.Context, prompt string) (*Stage, error)

MapStage maps prompt to stage based on score threshold.

type Tool ¶

type Tool struct {
	Name         string
	Description  string
	InputSchema  ToolSchema
	OutputSchema ToolSchema
	Run          ToolRunner
}

Tool defines a single tool: name, description, schemas, and runner.

func NewTool ¶

func NewTool(name, description string, inputSchema, outputSchema ToolSchema, run ToolRunner) (*Tool, error)

NewTool returns a Tool. run must be non-nil.

type ToolCall ¶

type ToolCall struct {
	ID             string     `json:"id"`
	Name           string     `json:"name"`
	InputArguments ToolSchema `json:"input_arguments"`
}

ToolCall is one tool invocation from the agent.

func NewToolCallFromRawToolCall ¶

func NewToolCallFromRawToolCall(rawToolCall RawToolCall) (*ToolCall, error)

NewToolCallFromRawToolCall converts a raw tool call from an InferenceResponse to a ToolCall. data is an element of the array InferenceResponse.ToolCalls.

type ToolCallDelta ¶

type ToolCallDelta struct {
	Name      string         `json:"name"`
	Arguments map[string]any `json:"arguments"`
}

ToolCallDelta is a streaming tool call notification.

type ToolResult ¶

type ToolResult struct {
	ID           string     `json:"id"`
	Name         string     `json:"name"`
	OutputValues ToolSchema `json:"output_values"`
	Error        string     `json:"error,omitempty"`
	IsError      bool       `json:"is_error,omitempty"`
}

ToolResult is the result of running one tool call.

func (ToolResult) ToMessage ¶

func (r ToolResult) ToMessage() (*Message, error)

ToMessage returns a tool-result message to append to the conversation.

type ToolRunner ¶

type ToolRunner func(ctx context.Context, input ToolSchema) (*ToolResult, error)

ToolRunner runs a tool: input from the model, result back to the model. Return a ToolResult with OutputValues (and optionally Error/IsError for tool-level failures). ID and Name are filled in by the agent; the runner only sets OutputValues, Error, IsError. Returning a non-nil error is treated as IsError true with Error set to err.Error().

func ToolRunnerFromSchema ¶

func ToolRunnerFromSchema(fn func(ctx context.Context, input ToolSchema) (ToolSchema, error)) ToolRunner

ToolRunnerFromSchema wraps a simple (ToolSchema, error) function as a ToolRunner. Use this when you don't need to return tool-level Error/IsError; a returned Go error becomes result.IsError.

type ToolSchema ¶

type ToolSchema map[string]any

ToolSchema is a JSON-serializable object (e.g. for tool input/output).

type Workflow ¶

type Workflow struct {
	// contains filtered or unexported fields
}

Workflow is a DAG of Stages with dependency-aware scheduling. Independent stages execute concurrently; dependent stages wait for their upstream stages to complete. Use AddStage and AddDependency to build the DAG, then Execute to run it.

func NewWorkflow ¶

func NewWorkflow(client *OrlaClient) *Workflow

NewWorkflow creates an empty workflow bound to the given client.

func (*Workflow) AddDependency ¶

func (w *Workflow) AddDependency(stageID, dependsOnStageID string) error

AddDependency declares that stageID depends on dependsOnStageID (dependsOnStageID must finish before stageID starts).

func (*Workflow) AddStage ¶

func (w *Workflow) AddStage(s *Stage) error

AddStage registers a stage in the workflow's DAG. Sets stage.Client automatically.

func (*Workflow) Execute ¶

func (w *Workflow) Execute(ctx context.Context) (map[string]*StageResult, error)

Execute runs the workflow's stage DAG with dependency-aware scheduling. Independent stages execute concurrently; context is passed between stages via PromptBuilder/MessagesBuilder. Returns results keyed by stage ID.

func (*Workflow) MemoryPolicyOrDefault ¶

func (w *Workflow) MemoryPolicyOrDefault() MemoryPolicy

MemoryPolicyOrDefault returns the configured MemoryPolicy or the default.

func (*Workflow) SetMemoryPolicy ¶

func (w *Workflow) SetMemoryPolicy(policy MemoryPolicy)

SetMemoryPolicy sets the workflow-level MemoryPolicy used by the Memory Manager to decide cache actions at stage transitions. If not set, the default policy (preserve on small increment + flush at boundary) is used.

func (*Workflow) Stages ¶

func (w *Workflow) Stages() map[string]*Stage

Stages returns all DAG stages keyed by ID.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL