Documentation
¶
Overview ¶
Package api provides the HTTP API for the serving layer daemon.
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AgenticServer ¶
type AgenticServer struct {
// contains filtered or unexported fields
}
AgenticServer is the HTTP API server for the daemon
func NewAgenticServer ¶
func NewAgenticServer(layer *serving.AgenticLayer, listenAddress string, opts *ServerOptions) *AgenticServer
NewAgenticServer creates a new daemon API server.
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client is the HTTP client for communicating with the daemon
type ExecuteRequest ¶
type ExecuteRequest struct {
Backend string `json:"backend"`
StageID string `json:"stage_id,omitempty"`
Prompt string `json:"prompt,omitempty"`
Messages []model.Message `json:"messages,omitempty"`
Tools []*mcp.Tool `json:"tools,omitempty"`
model.InferenceOptions
WorkflowID string `json:"workflow_id,omitempty"`
CachePolicy string `json:"cache_policy,omitempty"`
}
ExecuteRequest is the request body for the execute endpoint. Inference options (stream, max_tokens, temperature, top_p) are embedded so the JSON body stays flat.
type ExecuteResponse ¶
type ExecuteResponse struct {
Success bool `json:"success"`
Response *model.Response `json:"response,omitempty"`
Error string `json:"error,omitempty"`
}
ExecuteResponse is the response body for the execute endpoint.
type ListBackendsResponse ¶
type ListBackendsResponse struct {
Backends []string `json:"backends"`
}
ListBackendsResponse is the response body for list backends.
type RegisterBackendRequest ¶
type RegisterBackendRequest struct {
Name string `json:"name"` // backend name (used as "backend" in execute requests)
Endpoint string `json:"endpoint"` // e.g. "http://vllm:8000/v1", "http://localhost:11434"
Type string `json:"type"` // "openai" or "sglang"
ModelID string `json:"model_id"` // full model identifier e.g. "openai:Qwen/Qwen3-4B-Instruct-2507", "openai:llama3"
APIKeyEnvVar string `json:"api_key_env_var,omitempty"` // optional env var name for API key (for openai-type backends)
MaxConcurrency int `json:"max_concurrency,omitempty"` // max concurrent requests to this backend (default 1)
QueueCapacity int `json:"queue_capacity,omitempty"` // max queued requests; 0 = default (4096)
}
RegisterBackendRequest is the request body for registering an LLM backend.
type RegisterBackendResponse ¶
type RegisterBackendResponse struct {
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
RegisterBackendResponse is the response body for register backend.
type ServerOptions ¶
type ServerOptions struct {
// RateLimitRPS limits requests per second for execute and backends. 0 = disabled.
RateLimitRPS int
}
ServerOptions configures the API server.
type WorkflowCompleteRequest ¶
type WorkflowCompleteRequest struct {
WorkflowID string `json:"workflow_id"`
Backends []string `json:"backends"`
}
WorkflowCompleteRequest is the request body for the workflow/complete endpoint.
type WorkflowCompleteResponse ¶
type WorkflowCompleteResponse struct {
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
WorkflowCompleteResponse is the response body for the workflow/complete endpoint.