serving

package

v1.1.1 Latest Latest Go to latest Published: Jan 30, 2026 License: MIT Imports: 10 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/dorcha-inc/orla

Links

Open Source Insights

Documentation ¶

Overview ¶

Package serving implements the Agentic Serving Layer (RFC 5).

Index ¶

type CacheManager
- func NewCacheManager(evaluator *CachePolicyEvaluator) *CacheManager
- func (cm *CacheManager) FlushCache(ctx context.Context, serverManager *LLMServerManager, serverName string) error
- func (cm *CacheManager) GetMemoryPressure(ctx context.Context, serverManager *LLMServerManager, serverName string) float64
- func (cm *CacheManager) GetOrCreateCacheState(serverName string) *CacheState
- func (cm *CacheManager) MarkFlushed(serverName string)
- func (cm *CacheManager) MarkPreserved(serverName string)
- func (cm *CacheManager) ShouldFlush(ctx context.Context, serverName string, turnSize int, memoryPressure float64, ...) (bool, error)
type CachePolicyEvaluator
- func NewCachePolicyEvaluator(servers []*config.LLMServerConfig) *CachePolicyEvaluator
- func (e *CachePolicyEvaluator) EvaluateDecision(serverName string, turnSize int, memoryPressure float64, isFinalIteration bool, ...) (bool, error)
type CacheState
type ContextManager
- func NewContextManager() *ContextManager
- func (cm *ContextManager) GetOrCreateSharedContext(serverName string, syncInterval int) *SharedContext
- func (cm *ContextManager) GetSharedContext(serverName string) *SharedContext
type ExecuteTaskOptions
type HealthStatus
type LLMServerManager
- func NewLLMServerManager(servers []*config.LLMServerConfig) *LLMServerManager
- func (m *LLMServerManager) GetCacheController(serverName string) (model.CacheController, error)
- func (m *LLMServerManager) GetHealthStatus(ctx context.Context, serverName string) (HealthStatus, error)
- func (m *LLMServerManager) GetProvider(ctx context.Context, serverName string) (model.Provider, error)
- func (m *LLMServerManager) GetServerConfig(name string) (*config.LLMServerConfig, error)
- func (m *LLMServerManager) ListServers() []string
type Layer
- func NewLayer(cfg *config.AgenticServingConfig) (*Layer, error)
- func (l *Layer) ExecuteTask(ctx context.Context, execution *WorkflowExecution, taskIndex int, ...) (*model.Response, error)
- func (l *Layer) GetAgentProfile(profileName string) (*config.AgentProfile, bool)
- func (l *Layer) GetExecution(executionID string) (*WorkflowExecution, error)
- func (l *Layer) GetProvider(ctx context.Context, profileName string, task *config.WorkflowTask) (model.Provider, error)
- func (l *Layer) GetSharedContext(serverName string) *SharedContext
- func (l *Layer) StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)
type ServingLayer
type SharedContext
- func NewSharedContext(serverName string, syncInterval int) *SharedContext
- func (sc *SharedContext) AppendMessage(msg model.Message)
- func (sc *SharedContext) GetMessages() []model.Message
- func (sc *SharedContext) ShouldSync(currentTokenCount int) bool
- func (sc *SharedContext) UpdateSyncTokenCount(tokenCount int)
type WorkflowExecution
type WorkflowExecutionState
type WorkflowExecutor
- func NewWorkflowExecutor(workflows []*config.Workflow) *WorkflowExecutor
- func (e *WorkflowExecutor) AdvanceTask(executionID string) error
- func (e *WorkflowExecutor) GetExecution(executionID string) (*WorkflowExecution, error)
- func (e *WorkflowExecutor) GetWorkflow(name string) (*config.Workflow, error)
- func (e *WorkflowExecutor) IsComplete(executionID string) (bool, error)
- func (e *WorkflowExecutor) StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)
- func (e *WorkflowExecutor) UpdateExecutionState(executionID string, state WorkflowExecutionState) error

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type CacheManager ¶

type CacheManager struct {
	// contains filtered or unexported fields
}

CacheManager tracks cache state and makes flush/preserve decisions

func NewCacheManager ¶

func NewCacheManager(evaluator *CachePolicyEvaluator) *CacheManager

NewCacheManager creates a new cache manager

func (*CacheManager) FlushCache ¶

func (cm *CacheManager) FlushCache(ctx context.Context, serverManager *LLMServerManager, serverName string) error

FlushCache actually flushes the cache for a server using its cache controller

func (*CacheManager) GetMemoryPressure ¶ added in v1.0.2

func (cm *CacheManager) GetMemoryPressure(ctx context.Context, serverManager *LLMServerManager, serverName string) float64

GetMemoryPressure queries the memory pressure for a server from its cache controller Returns 0.0 if memory pressure cannot be determined

func (*CacheManager) GetOrCreateCacheState ¶

func (cm *CacheManager) GetOrCreateCacheState(serverName string) *CacheState

GetOrCreateCacheState gets or creates a cache state for an LLM server This method locks the mutex internally

func (*CacheManager) MarkFlushed ¶

func (cm *CacheManager) MarkFlushed(serverName string)

MarkFlushed marks the cache as flushed for an LLM server

func (*CacheManager) MarkPreserved ¶

func (cm *CacheManager) MarkPreserved(serverName string)

MarkPreserved marks the cache as preserved for an LLM server

func (*CacheManager) ShouldFlush ¶

func (cm *CacheManager) ShouldFlush(ctx context.Context, serverName string, turnSize int, memoryPressure float64, isFinalIteration bool, workflowName string) (bool, error)

ShouldFlush determines if the cache should be flushed based on the policy workflowName is optional and used by preserve_within_workflow policy to detect workflow transitions

type CachePolicyEvaluator ¶

type CachePolicyEvaluator struct {
	// contains filtered or unexported fields
}

CachePolicyEvaluator evaluates cache policies based on various factors

func NewCachePolicyEvaluator ¶

func NewCachePolicyEvaluator(servers []*config.LLMServerConfig) *CachePolicyEvaluator

NewCachePolicyEvaluator creates a new cache policy evaluator

func (*CachePolicyEvaluator) EvaluateDecision ¶

func (e *CachePolicyEvaluator) EvaluateDecision(serverName string, turnSize int, memoryPressure float64, isFinalIteration bool, workflowName string, lastWorkflowName string) (bool, error)

EvaluateDecision evaluates whether to flush or preserve cache based on the policy Returns true if cache should be flushed, false if it should be preserved workflowName is optional and used by preserve_within_workflow policy to detect workflow transitions

type CacheState ¶

type CacheState struct {
	// ServerName is the name of the LLM server
	ServerName string
	// IsFlushed indicates whether the cache has been flushed
	IsFlushed bool
	// LastTurnSize is the size of the last turn (in tokens)
	LastTurnSize int
	// LastMemoryPressure is the last known memory pressure (0.0-1.0)
	LastMemoryPressure float64
	// LastWorkflowName is the name of the last workflow that used this cache
	LastWorkflowName string
}

CacheState represents the state of a cache for an LLM server

type ContextManager ¶

type ContextManager struct {
	// contains filtered or unexported fields
}

ContextManager coordinates context sharing across agents

func NewContextManager ¶

func NewContextManager() *ContextManager

NewContextManager creates a new context manager

func (*ContextManager) GetOrCreateSharedContext ¶

func (cm *ContextManager) GetOrCreateSharedContext(serverName string, syncInterval int) *SharedContext

GetOrCreateSharedContext gets or creates a shared context for an LLM server

func (*ContextManager) GetSharedContext ¶

func (cm *ContextManager) GetSharedContext(serverName string) *SharedContext

GetSharedContext gets a shared context for an LLM server (returns nil if not found)

type ExecuteTaskOptions ¶ added in v1.1.0

type ExecuteTaskOptions struct {
	// MaxTokens is the maximum number of tokens to generate.
	MaxTokens int
	// Stream is whether to stream the response.
	Stream bool
}

ExecuteTaskOptions are the additional options for executing a task

type HealthStatus ¶

type HealthStatus string

HealthStatus represents the health status of an LLM server

const (
	// HealthStatusHealthy indicates the server is healthy
	HealthStatusHealthy HealthStatus = "healthy"
	// HealthStatusDegraded indicates the server is degraded
	HealthStatusDegraded HealthStatus = "degraded"
	// HealthStatusUnavailable indicates the server is unavailable
	HealthStatusUnavailable HealthStatus = "unavailable"
)

type LLMServerManager ¶

type LLMServerManager struct {
	// contains filtered or unexported fields
}

LLMServerManager manages a pool of LLM server configurations and their providers

func NewLLMServerManager ¶

func NewLLMServerManager(servers []*config.LLMServerConfig) *LLMServerManager

NewLLMServerManager creates a new LLM server manager

func (*LLMServerManager) GetCacheController ¶

func (m *LLMServerManager) GetCacheController(serverName string) (model.CacheController, error)

GetCacheController returns a cached cache controller for an LLM server, creating it if necessary

func (*LLMServerManager) GetHealthStatus ¶

func (m *LLMServerManager) GetHealthStatus(ctx context.Context, serverName string) (HealthStatus, error)

GetHealthStatus returns the health status of an LLM server It performs actual health checks by calling the provider's EnsureReady method and measuring response times to determine health status

func (*LLMServerManager) GetProvider ¶

func (m *LLMServerManager) GetProvider(ctx context.Context, serverName string) (model.Provider, error)

GetProvider returns a cached provider for an LLM server, creating it if necessary

func (*LLMServerManager) GetServerConfig ¶

func (m *LLMServerManager) GetServerConfig(name string) (*config.LLMServerConfig, error)

GetServerConfig returns the LLM server configuration for a given name

func (*LLMServerManager) ListServers ¶

func (m *LLMServerManager) ListServers() []string

ListServers returns a list of all server names

type Layer ¶

type Layer struct {
	// contains filtered or unexported fields
}

Layer is the concrete implementation of ServingLayer

func NewLayer ¶

func NewLayer(cfg *config.AgenticServingConfig) (*Layer, error)

NewLayer creates a new serving layer

func (*Layer) ExecuteTask ¶

func (l *Layer) ExecuteTask(ctx context.Context, execution *WorkflowExecution, taskIndex int, prompt string, options ExecuteTaskOptions) (*model.Response, error)

ExecuteTask executes a single workflow task with the given prompt It handles shared context, cache policies, and returns the response

func (*Layer) GetAgentProfile ¶

func (l *Layer) GetAgentProfile(profileName string) (*config.AgentProfile, bool)

GetAgentProfile returns an agent profile by name

func (*Layer) GetExecution ¶

func (l *Layer) GetExecution(executionID string) (*WorkflowExecution, error)

GetExecution returns a workflow execution by ID

func (*Layer) GetProvider ¶

func (l *Layer) GetProvider(ctx context.Context, profileName string, task *config.WorkflowTask) (model.Provider, error)

GetProvider returns a model provider for an agent profile or workflow task

func (*Layer) GetSharedContext ¶

func (l *Layer) GetSharedContext(serverName string) *SharedContext

GetSharedContext returns shared context for a given LLM server

func (*Layer) StartWorkflow ¶

func (l *Layer) StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)

StartWorkflow initializes a workflow execution

type ServingLayer ¶

type ServingLayer interface {
	// GetProvider returns a model provider for an agent profile or workflow task
	// If task is nil, uses the agent profile's LLM server configuration
	// If task is not nil and has an LLM server override, uses that instead
	GetProvider(ctx context.Context, profileName string, task *config.WorkflowTask) (model.Provider, error)

	// StartWorkflow initializes a workflow execution
	StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)

	// ExecuteTask executes a single workflow task with the given prompt and options
	// Returns the response from the task execution (includes metrics when options.Stream is true)
	ExecuteTask(ctx context.Context, execution *WorkflowExecution, taskIndex int, prompt string, options ExecuteTaskOptions) (*model.Response, error)

	// GetSharedContext returns shared context for a given LLM server
	GetSharedContext(serverName string) *SharedContext

	// GetExecution returns a workflow execution by ID
	GetExecution(executionID string) (*WorkflowExecution, error)
}

ServingLayer is the core interface for the Agentic Serving Layer

type SharedContext ¶

type SharedContext struct {
	// ServerName is the name of the LLM server this context belongs to
	ServerName string
	// Messages is the shared conversation history
	Messages []model.Message

	// SyncInterval is how often to synchronize context (in tokens)
	SyncInterval int
	// LastSyncTokenCount is the token count at the last synchronization
	LastSyncTokenCount int
	// contains filtered or unexported fields
}

SharedContext manages shared conversation context for multi-agent scenarios

func NewSharedContext ¶

func NewSharedContext(serverName string, syncInterval int) *SharedContext

NewSharedContext creates a new shared context

func (*SharedContext) AppendMessage ¶

func (sc *SharedContext) AppendMessage(msg model.Message)

AppendMessage appends a message to the shared context

func (*SharedContext) GetMessages ¶

func (sc *SharedContext) GetMessages() []model.Message

GetMessages returns a copy of all messages in the shared context

func (*SharedContext) ShouldSync ¶

func (sc *SharedContext) ShouldSync(currentTokenCount int) bool

ShouldSync checks if the context should be synchronized based on token count

func (*SharedContext) UpdateSyncTokenCount ¶

func (sc *SharedContext) UpdateSyncTokenCount(tokenCount int)

UpdateSyncTokenCount updates the token count for synchronization tracking

type WorkflowExecution ¶

type WorkflowExecution struct {
	// ExecutionID is the unique identifier for this execution
	ExecutionID string
	// WorkflowName is the name of the workflow being executed
	WorkflowName string
	// CurrentTaskIndex is the index of the current task (0-based)
	CurrentTaskIndex int
	// Tasks is the list of tasks in the workflow
	Tasks []*config.WorkflowTask
	// State is the execution state
	State WorkflowExecutionState
	// Context holds the accumulated messages from previous tasks
	// This is used when tasks have UseContext: true
	Context []model.Message
}

WorkflowExecution represents an active workflow execution

type WorkflowExecutionState ¶

type WorkflowExecutionState string

WorkflowExecutionState represents the state of a workflow execution

const (
	// WorkflowExecutionStatePending indicates the workflow is pending execution
	WorkflowExecutionStatePending WorkflowExecutionState = "pending"
	// WorkflowExecutionStateRunning indicates the workflow is currently running
	WorkflowExecutionStateRunning WorkflowExecutionState = "running"
	// WorkflowExecutionStateCompleted indicates the workflow has completed
	WorkflowExecutionStateCompleted WorkflowExecutionState = "completed"
	// WorkflowExecutionStateFailed indicates the workflow has failed
	WorkflowExecutionStateFailed WorkflowExecutionState = "failed"
)

type WorkflowExecutor ¶

type WorkflowExecutor struct {
	// contains filtered or unexported fields
}

WorkflowExecutor executes workflow sequences

func NewWorkflowExecutor ¶

func NewWorkflowExecutor(workflows []*config.Workflow) *WorkflowExecutor

NewWorkflowExecutor creates a new workflow executor

func (*WorkflowExecutor) AdvanceTask ¶

func (e *WorkflowExecutor) AdvanceTask(executionID string) error

AdvanceTask advances to the next task in a workflow execution

func (*WorkflowExecutor) GetExecution ¶

func (e *WorkflowExecutor) GetExecution(executionID string) (*WorkflowExecution, error)

GetExecution returns a workflow execution by ID

func (*WorkflowExecutor) GetWorkflow ¶

func (e *WorkflowExecutor) GetWorkflow(name string) (*config.Workflow, error)

GetWorkflow returns a workflow configuration by name

func (*WorkflowExecutor) IsComplete ¶

func (e *WorkflowExecutor) IsComplete(executionID string) (bool, error)

IsComplete checks if a workflow execution is complete

func (*WorkflowExecutor) StartWorkflow ¶

func (e *WorkflowExecutor) StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)

StartWorkflow initializes a workflow execution

func (*WorkflowExecutor) UpdateExecutionState ¶

func (e *WorkflowExecutor) UpdateExecutionState(executionID string, state WorkflowExecutionState) error

UpdateExecutionState updates the state of a workflow execution

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
api Package api provides the HTTP client for communicating with the Agentic Serving Layer daemon (RFC 5).	Package api provides the HTTP client for communicating with the Agentic Serving Layer daemon (RFC 5).

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL