Documentation
¶
Overview ¶
Package serving implements a minimal programmatic serving layer.
Index ¶
- type AgenticLayer
- func (l *AgenticLayer) AddLLMBackend(name string, backend *core.LLMBackend, modelID string)
- func (l *AgenticLayer) Execute(ctx context.Context, serverName, stageName string, messages []model.Message, ...) (*model.Response, error)
- func (l *AgenticLayer) ExecuteStream(ctx context.Context, serverName, stageName string, messages []model.Message, ...) (*model.Response, <-chan model.StreamEvent, error)
- func (l *AgenticLayer) GetLLMBackendHealth(ctx context.Context, serverName string) (HealthStatus, error)
- func (l *AgenticLayer) GetModelProvider(ctx context.Context, backendName string) (model.Provider, error)
- func (l *AgenticLayer) ListLLMBackends() []string
- func (l *AgenticLayer) NotifyWorkflowComplete(ctx context.Context, workflowID string, backends []string)
- func (l *AgenticLayer) StartPressureMonitor(ctx context.Context)
- type ChatOptions
- type HealthStatus
- type LLMBackendManager
- func (m *LLMBackendManager) AddLLMBackend(name string, backend *core.LLMBackend, modelID string)
- func (m *LLMBackendManager) GetHealthStatus(ctx context.Context, backendName string) (HealthStatus, error)
- func (m *LLMBackendManager) GetModelID(backendName string) string
- func (m *LLMBackendManager) GetModelProvider(ctx context.Context, backendName string) (model.Provider, error)
- func (m *LLMBackendManager) ListLLMBackends() []string
- func (m *LLMBackendManager) ScheduleChat(ctx context.Context, backendName, stageName string, messages []model.Message, ...) (*model.Response, <-chan model.StreamEvent, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AgenticLayer ¶
type AgenticLayer struct {
MemoryManager *memory.DefaultManager
// contains filtered or unexported fields
}
AgenticLayer is the serving layer that manages LLM backends and executes inference.
func NewAgenticLayer ¶
func NewAgenticLayer() *AgenticLayer
NewAgenticLayer creates a new serving layer.
func (*AgenticLayer) AddLLMBackend ¶
func (l *AgenticLayer) AddLLMBackend(name string, backend *core.LLMBackend, modelID string)
AddLLMBackend registers an LLM backend by name.
func (*AgenticLayer) Execute ¶
func (l *AgenticLayer) Execute(ctx context.Context, serverName, stageName string, messages []model.Message, tools []*mcp.Tool, opts model.InferenceOptions, chatOpts ...ChatOptions) (*model.Response, error)
Execute runs a single non-streaming inference call against the named LLM backend. For streaming, use ExecuteStream instead. opts.Stream must be false.
func (*AgenticLayer) ExecuteStream ¶
func (l *AgenticLayer) ExecuteStream(ctx context.Context, serverName, stageName string, messages []model.Message, tools []*mcp.Tool, opts model.InferenceOptions, chatOpts ...ChatOptions) (*model.Response, <-chan model.StreamEvent, error)
ExecuteStream runs inference with streaming. It returns the response (filled as the stream is consumed), a channel of stream events, and an error. The caller must consume the channel until closed; the response content, tool_calls, and metrics are populated by the provider's goroutine as the stream completes. opts.Stream must be true.
func (*AgenticLayer) GetLLMBackendHealth ¶
func (l *AgenticLayer) GetLLMBackendHealth(ctx context.Context, serverName string) (HealthStatus, error)
GetLLMBackendHealth returns the health status of a named LLM backend.
func (*AgenticLayer) GetModelProvider ¶
func (l *AgenticLayer) GetModelProvider(ctx context.Context, backendName string) (model.Provider, error)
GetModelProvider returns the model provider for a named LLM backend.
func (*AgenticLayer) ListLLMBackends ¶
func (l *AgenticLayer) ListLLMBackends() []string
ListLLMBackends returns all registered LLM backend names.
func (*AgenticLayer) NotifyWorkflowComplete ¶
func (l *AgenticLayer) NotifyWorkflowComplete(ctx context.Context, workflowID string, backends []string)
NotifyWorkflowComplete emits TransitionWorkflowComplete signals for each backend the workflow used, then deregisters the workflow from the tracker.
func (*AgenticLayer) StartPressureMonitor ¶
func (l *AgenticLayer) StartPressureMonitor(ctx context.Context)
StartPressureMonitor launches the background memory pressure polling loop. It dynamically queries the current set of backends on each tick and stops when ctx is cancelled.
type ChatOptions ¶
ChatOptions carries optional metadata for a scheduled chat request.
type HealthStatus ¶
type HealthStatus string
HealthStatus represents the health status of an LLM backend
const ( HealthStatusHealthy HealthStatus = "healthy" HealthStatusDegraded HealthStatus = "degraded" )
type LLMBackendManager ¶
type LLMBackendManager struct {
// contains filtered or unexported fields
}
LLMBackendManager manages a pool of LLM backend configurations and their providers
func NewLLMBackendManager ¶
func NewLLMBackendManager(mm *memory.DefaultManager) *LLMBackendManager
NewLLMBackendManager creates a new LLM backend manager.
func (*LLMBackendManager) AddLLMBackend ¶
func (m *LLMBackendManager) AddLLMBackend(name string, backend *core.LLMBackend, modelID string)
AddLLMBackend registers an LLM backend by name.
func (*LLMBackendManager) GetHealthStatus ¶
func (m *LLMBackendManager) GetHealthStatus(ctx context.Context, backendName string) (HealthStatus, error)
GetHealthStatus returns the health status of an LLM backend
func (*LLMBackendManager) GetModelID ¶
func (m *LLMBackendManager) GetModelID(backendName string) string
GetModelID returns the modelID string for a registered backend, or "" if not found.
func (*LLMBackendManager) GetModelProvider ¶
func (m *LLMBackendManager) GetModelProvider(ctx context.Context, backendName string) (model.Provider, error)
GetModelProvider returns a cached provider for an LLM backend, creating it if necessary
func (*LLMBackendManager) ListLLMBackends ¶
func (m *LLMBackendManager) ListLLMBackends() []string
ListLLMBackends returns a list of all LLM backend names
func (*LLMBackendManager) ScheduleChat ¶
func (m *LLMBackendManager) ScheduleChat(ctx context.Context, backendName, stageName string, messages []model.Message, tools []*mcp.Tool, opts model.InferenceOptions, chatOpts ...ChatOptions) (*model.Response, <-chan model.StreamEvent, error)
ScheduleChat queues a request for execution under the backend's scheduling policy. stageName identifies the stage queue inside the backend. Empty uses "default".
Directories
¶
| Path | Synopsis |
|---|---|
|
Package api provides the HTTP API for the serving layer daemon.
|
Package api provides the HTTP API for the serving layer daemon. |
|
Package memory implements the Memory Manager for Orla's agentic serving layer.
|
Package memory implements the Memory Manager for Orla's agentic serving layer. |
|
Package metrics provides Prometheus metrics for the Orla serving layer.
|
Package metrics provides Prometheus metrics for the Orla serving layer. |