Documentation
¶
Overview ¶
Package serving implements the Agentic Serving Layer (RFC 5).
Package serving implements the Agentic Serving Layer (RFC 5).
Package serving implements the Agentic Serving Layer (RFC 5).
Package serving implements the Agentic Serving Layer (RFC 5).
Package serving implements the Agentic Serving Layer (RFC 5).
Package serving implements the Agentic Serving Layer (RFC 5).
Index ¶
- type CacheManager
- func (cm *CacheManager) FlushCache(ctx context.Context, serverManager *LLMServerManager, serverName string) error
- func (cm *CacheManager) GetMemoryPressure(ctx context.Context, serverManager *LLMServerManager, serverName string) float64
- func (cm *CacheManager) GetOrCreateCacheState(serverName string) *CacheState
- func (cm *CacheManager) MarkFlushed(serverName string)
- func (cm *CacheManager) MarkPreserved(serverName string)
- func (cm *CacheManager) ShouldFlush(ctx context.Context, serverName string, turnSize int, memoryPressure float64, ...) (bool, error)
- type CachePolicyEvaluator
- type CacheState
- type ContextManager
- type ExecuteTaskOptions
- type HealthStatus
- type LLMServerManager
- func (m *LLMServerManager) GetCacheController(serverName string) (model.CacheController, error)
- func (m *LLMServerManager) GetHealthStatus(ctx context.Context, serverName string) (HealthStatus, error)
- func (m *LLMServerManager) GetProvider(ctx context.Context, serverName string) (model.Provider, error)
- func (m *LLMServerManager) GetServerConfig(name string) (*config.LLMServerConfig, error)
- func (m *LLMServerManager) ListServers() []string
- type Layer
- func (l *Layer) ExecuteTask(ctx context.Context, execution *WorkflowExecution, taskIndex int, ...) (*model.Response, error)
- func (l *Layer) GetAgentProfile(profileName string) (*config.AgentProfile, bool)
- func (l *Layer) GetExecution(executionID string) (*WorkflowExecution, error)
- func (l *Layer) GetProvider(ctx context.Context, profileName string, task *config.WorkflowTask) (model.Provider, error)
- func (l *Layer) GetSharedContext(serverName string) *SharedContext
- func (l *Layer) StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)
- type ServingLayer
- type SharedContext
- type WorkflowExecution
- type WorkflowExecutionState
- type WorkflowExecutor
- func (e *WorkflowExecutor) AdvanceTask(executionID string) error
- func (e *WorkflowExecutor) GetExecution(executionID string) (*WorkflowExecution, error)
- func (e *WorkflowExecutor) GetWorkflow(name string) (*config.Workflow, error)
- func (e *WorkflowExecutor) IsComplete(executionID string) (bool, error)
- func (e *WorkflowExecutor) StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)
- func (e *WorkflowExecutor) UpdateExecutionState(executionID string, state WorkflowExecutionState) error
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CacheManager ¶
type CacheManager struct {
// contains filtered or unexported fields
}
CacheManager tracks cache state and makes flush/preserve decisions
func NewCacheManager ¶
func NewCacheManager(evaluator *CachePolicyEvaluator) *CacheManager
NewCacheManager creates a new cache manager
func (*CacheManager) FlushCache ¶
func (cm *CacheManager) FlushCache(ctx context.Context, serverManager *LLMServerManager, serverName string) error
FlushCache actually flushes the cache for a server using its cache controller
func (*CacheManager) GetMemoryPressure ¶ added in v1.0.2
func (cm *CacheManager) GetMemoryPressure(ctx context.Context, serverManager *LLMServerManager, serverName string) float64
GetMemoryPressure queries the memory pressure for a server from its cache controller Returns 0.0 if memory pressure cannot be determined
func (*CacheManager) GetOrCreateCacheState ¶
func (cm *CacheManager) GetOrCreateCacheState(serverName string) *CacheState
GetOrCreateCacheState gets or creates a cache state for an LLM server This method locks the mutex internally
func (*CacheManager) MarkFlushed ¶
func (cm *CacheManager) MarkFlushed(serverName string)
MarkFlushed marks the cache as flushed for an LLM server
func (*CacheManager) MarkPreserved ¶
func (cm *CacheManager) MarkPreserved(serverName string)
MarkPreserved marks the cache as preserved for an LLM server
func (*CacheManager) ShouldFlush ¶
func (cm *CacheManager) ShouldFlush(ctx context.Context, serverName string, turnSize int, memoryPressure float64, isFinalIteration bool, workflowName string) (bool, error)
ShouldFlush determines if the cache should be flushed based on the policy workflowName is optional and used by preserve_within_workflow policy to detect workflow transitions
type CachePolicyEvaluator ¶
type CachePolicyEvaluator struct {
// contains filtered or unexported fields
}
CachePolicyEvaluator evaluates cache policies based on various factors
func NewCachePolicyEvaluator ¶
func NewCachePolicyEvaluator(servers []*config.LLMServerConfig) *CachePolicyEvaluator
NewCachePolicyEvaluator creates a new cache policy evaluator
func (*CachePolicyEvaluator) EvaluateDecision ¶
func (e *CachePolicyEvaluator) EvaluateDecision(serverName string, turnSize int, memoryPressure float64, isFinalIteration bool, workflowName string, lastWorkflowName string) (bool, error)
EvaluateDecision evaluates whether to flush or preserve cache based on the policy Returns true if cache should be flushed, false if it should be preserved workflowName is optional and used by preserve_within_workflow policy to detect workflow transitions
type CacheState ¶
type CacheState struct {
// ServerName is the name of the LLM server
ServerName string
// IsFlushed indicates whether the cache has been flushed
IsFlushed bool
// LastTurnSize is the size of the last turn (in tokens)
LastTurnSize int
// LastMemoryPressure is the last known memory pressure (0.0-1.0)
LastMemoryPressure float64
// LastWorkflowName is the name of the last workflow that used this cache
LastWorkflowName string
}
CacheState represents the state of a cache for an LLM server
type ContextManager ¶
type ContextManager struct {
// contains filtered or unexported fields
}
ContextManager coordinates context sharing across agents
func NewContextManager ¶
func NewContextManager() *ContextManager
NewContextManager creates a new context manager
func (*ContextManager) GetOrCreateSharedContext ¶
func (cm *ContextManager) GetOrCreateSharedContext(serverName string, syncInterval int) *SharedContext
GetOrCreateSharedContext gets or creates a shared context for an LLM server
func (*ContextManager) GetSharedContext ¶
func (cm *ContextManager) GetSharedContext(serverName string) *SharedContext
GetSharedContext gets a shared context for an LLM server (returns nil if not found)
type ExecuteTaskOptions ¶ added in v1.1.0
type ExecuteTaskOptions struct {
// MaxTokens is the maximum number of tokens to generate.
MaxTokens int
// Stream is whether to stream the response.
Stream bool
}
ExecuteTaskOptions are the additional options for executing a task
type HealthStatus ¶
type HealthStatus string
HealthStatus represents the health status of an LLM server
const ( // HealthStatusHealthy indicates the server is healthy HealthStatusHealthy HealthStatus = "healthy" // HealthStatusDegraded indicates the server is degraded HealthStatusDegraded HealthStatus = "degraded" HealthStatusUnavailable HealthStatus = "unavailable" )
type LLMServerManager ¶
type LLMServerManager struct {
// contains filtered or unexported fields
}
LLMServerManager manages a pool of LLM server configurations and their providers
func NewLLMServerManager ¶
func NewLLMServerManager(servers []*config.LLMServerConfig) *LLMServerManager
NewLLMServerManager creates a new LLM server manager
func (*LLMServerManager) GetCacheController ¶
func (m *LLMServerManager) GetCacheController(serverName string) (model.CacheController, error)
GetCacheController returns a cached cache controller for an LLM server, creating it if necessary
func (*LLMServerManager) GetHealthStatus ¶
func (m *LLMServerManager) GetHealthStatus(ctx context.Context, serverName string) (HealthStatus, error)
GetHealthStatus returns the health status of an LLM server It performs actual health checks by calling the provider's EnsureReady method and measuring response times to determine health status
func (*LLMServerManager) GetProvider ¶
func (m *LLMServerManager) GetProvider(ctx context.Context, serverName string) (model.Provider, error)
GetProvider returns a cached provider for an LLM server, creating it if necessary
func (*LLMServerManager) GetServerConfig ¶
func (m *LLMServerManager) GetServerConfig(name string) (*config.LLMServerConfig, error)
GetServerConfig returns the LLM server configuration for a given name
func (*LLMServerManager) ListServers ¶
func (m *LLMServerManager) ListServers() []string
ListServers returns a list of all server names
type Layer ¶
type Layer struct {
// contains filtered or unexported fields
}
Layer is the concrete implementation of ServingLayer
func NewLayer ¶
func NewLayer(cfg *config.AgenticServingConfig) (*Layer, error)
NewLayer creates a new serving layer
func (*Layer) ExecuteTask ¶
func (l *Layer) ExecuteTask(ctx context.Context, execution *WorkflowExecution, taskIndex int, prompt string, options ExecuteTaskOptions) (*model.Response, error)
ExecuteTask executes a single workflow task with the given prompt It handles shared context, cache policies, and returns the response
func (*Layer) GetAgentProfile ¶
func (l *Layer) GetAgentProfile(profileName string) (*config.AgentProfile, bool)
GetAgentProfile returns an agent profile by name
func (*Layer) GetExecution ¶
func (l *Layer) GetExecution(executionID string) (*WorkflowExecution, error)
GetExecution returns a workflow execution by ID
func (*Layer) GetProvider ¶
func (l *Layer) GetProvider(ctx context.Context, profileName string, task *config.WorkflowTask) (model.Provider, error)
GetProvider returns a model provider for an agent profile or workflow task
func (*Layer) GetSharedContext ¶
func (l *Layer) GetSharedContext(serverName string) *SharedContext
GetSharedContext returns shared context for a given LLM server
func (*Layer) StartWorkflow ¶
StartWorkflow initializes a workflow execution
type ServingLayer ¶
type ServingLayer interface {
// GetProvider returns a model provider for an agent profile or workflow task
// If task is nil, uses the agent profile's LLM server configuration
// If task is not nil and has an LLM server override, uses that instead
GetProvider(ctx context.Context, profileName string, task *config.WorkflowTask) (model.Provider, error)
// StartWorkflow initializes a workflow execution
StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)
// ExecuteTask executes a single workflow task with the given prompt and options
// Returns the response from the task execution (includes metrics when options.Stream is true)
ExecuteTask(ctx context.Context, execution *WorkflowExecution, taskIndex int, prompt string, options ExecuteTaskOptions) (*model.Response, error)
GetSharedContext(serverName string) *SharedContext
// GetExecution returns a workflow execution by ID
GetExecution(executionID string) (*WorkflowExecution, error)
}
ServingLayer is the core interface for the Agentic Serving Layer
type SharedContext ¶
type SharedContext struct {
ServerName string
Messages []model.Message
SyncInterval int
LastSyncTokenCount int
// contains filtered or unexported fields
}
SharedContext manages shared conversation context for multi-agent scenarios
func NewSharedContext ¶
func NewSharedContext(serverName string, syncInterval int) *SharedContext
NewSharedContext creates a new shared context
func (*SharedContext) AppendMessage ¶
func (sc *SharedContext) AppendMessage(msg model.Message)
AppendMessage appends a message to the shared context
func (*SharedContext) GetMessages ¶
func (sc *SharedContext) GetMessages() []model.Message
GetMessages returns a copy of all messages in the shared context
func (*SharedContext) ShouldSync ¶
func (sc *SharedContext) ShouldSync(currentTokenCount int) bool
ShouldSync checks if the context should be synchronized based on token count
func (*SharedContext) UpdateSyncTokenCount ¶
func (sc *SharedContext) UpdateSyncTokenCount(tokenCount int)
UpdateSyncTokenCount updates the token count for synchronization tracking
type WorkflowExecution ¶
type WorkflowExecution struct {
// ExecutionID is the unique identifier for this execution
ExecutionID string
// WorkflowName is the name of the workflow being executed
WorkflowName string
// CurrentTaskIndex is the index of the current task (0-based)
CurrentTaskIndex int
// Tasks is the list of tasks in the workflow
Tasks []*config.WorkflowTask
// State is the execution state
State WorkflowExecutionState
// Context holds the accumulated messages from previous tasks
// This is used when tasks have UseContext: true
Context []model.Message
}
WorkflowExecution represents an active workflow execution
type WorkflowExecutionState ¶
type WorkflowExecutionState string
WorkflowExecutionState represents the state of a workflow execution
const ( // WorkflowExecutionStatePending indicates the workflow is pending execution WorkflowExecutionStatePending WorkflowExecutionState = "pending" // WorkflowExecutionStateRunning indicates the workflow is currently running WorkflowExecutionStateRunning WorkflowExecutionState = "running" // WorkflowExecutionStateCompleted indicates the workflow has completed WorkflowExecutionStateCompleted WorkflowExecutionState = "completed" // WorkflowExecutionStateFailed indicates the workflow has failed WorkflowExecutionStateFailed WorkflowExecutionState = "failed" )
type WorkflowExecutor ¶
type WorkflowExecutor struct {
// contains filtered or unexported fields
}
WorkflowExecutor executes workflow sequences
func NewWorkflowExecutor ¶
func NewWorkflowExecutor(workflows []*config.Workflow) *WorkflowExecutor
NewWorkflowExecutor creates a new workflow executor
func (*WorkflowExecutor) AdvanceTask ¶
func (e *WorkflowExecutor) AdvanceTask(executionID string) error
AdvanceTask advances to the next task in a workflow execution
func (*WorkflowExecutor) GetExecution ¶
func (e *WorkflowExecutor) GetExecution(executionID string) (*WorkflowExecution, error)
GetExecution returns a workflow execution by ID
func (*WorkflowExecutor) GetWorkflow ¶
func (e *WorkflowExecutor) GetWorkflow(name string) (*config.Workflow, error)
GetWorkflow returns a workflow configuration by name
func (*WorkflowExecutor) IsComplete ¶
func (e *WorkflowExecutor) IsComplete(executionID string) (bool, error)
IsComplete checks if a workflow execution is complete
func (*WorkflowExecutor) StartWorkflow ¶
func (e *WorkflowExecutor) StartWorkflow(ctx context.Context, workflowName string) (*WorkflowExecution, error)
StartWorkflow initializes a workflow execution
func (*WorkflowExecutor) UpdateExecutionState ¶
func (e *WorkflowExecutor) UpdateExecutionState(executionID string, state WorkflowExecutionState) error
UpdateExecutionState updates the state of a workflow execution