api

package
v1.2.10 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 23, 2026 License: MIT Imports: 13 Imported by: 0

Documentation

Overview

Package api provides the HTTP API for the serving layer daemon.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AgenticServer

type AgenticServer struct {
	// contains filtered or unexported fields
}

AgenticServer is the HTTP API server for the daemon

func NewAgenticServer

func NewAgenticServer(layer *serving.AgenticLayer, listenAddress string, opts *ServerOptions) *AgenticServer

NewAgenticServer creates a new daemon API server.

func (*AgenticServer) Shutdown

func (s *AgenticServer) Shutdown(ctx context.Context) error

Shutdown gracefully shuts down the HTTP server

func (*AgenticServer) Start

func (s *AgenticServer) Start() error

Start starts the HTTP server

type Client

type Client struct {
	// contains filtered or unexported fields
}

Client is the HTTP client for communicating with the daemon

func NewClient

func NewClient(baseURL string) *Client

NewClient creates a new daemon client

func (*Client) Health

func (c *Client) Health(ctx context.Context) error

Health checks the health of the daemon

type ExecuteRequest

type ExecuteRequest struct {
	Backend  string          `json:"backend"`
	StageID  string          `json:"stage_id,omitempty"`
	Prompt   string          `json:"prompt,omitempty"`
	Messages []model.Message `json:"messages,omitempty"`
	Tools    []*mcp.Tool     `json:"tools,omitempty"`
	model.InferenceOptions

	WorkflowID  string `json:"workflow_id,omitempty"`
	CachePolicy string `json:"cache_policy,omitempty"`
}

ExecuteRequest is the request body for the execute endpoint. Inference options (stream, max_tokens, temperature, top_p) are embedded so the JSON body stays flat.

type ExecuteResponse

type ExecuteResponse struct {
	Success  bool            `json:"success"`
	Response *model.Response `json:"response,omitempty"`
	Error    string          `json:"error,omitempty"`
}

ExecuteResponse is the response body for the execute endpoint.

type ListBackendsResponse

type ListBackendsResponse struct {
	Backends []string `json:"backends"`
}

ListBackendsResponse is the response body for list backends.

type RegisterBackendRequest

type RegisterBackendRequest struct {
	Name           string `json:"name"`                      // backend name (used as "backend" in execute requests)
	Endpoint       string `json:"endpoint"`                  // e.g. "http://vllm:8000/v1", "http://localhost:11434"
	Type           string `json:"type"`                      // "openai" or "sglang"
	ModelID        string `json:"model_id"`                  // full model identifier e.g. "openai:Qwen/Qwen3-4B-Instruct-2507", "openai:llama3"
	APIKeyEnvVar   string `json:"api_key_env_var,omitempty"` // optional env var name for API key (for openai-type backends)
	MaxConcurrency int    `json:"max_concurrency,omitempty"` // max concurrent requests to this backend (default 1)
	QueueCapacity  int    `json:"queue_capacity,omitempty"`  // max queued requests; 0 = default (4096)
}

RegisterBackendRequest is the request body for registering an LLM backend.

type RegisterBackendResponse

type RegisterBackendResponse struct {
	Success bool   `json:"success"`
	Error   string `json:"error,omitempty"`
}

RegisterBackendResponse is the response body for register backend.

type ServerOptions

type ServerOptions struct {
	// RateLimitRPS limits requests per second for execute and backends. 0 = disabled.
	RateLimitRPS int
}

ServerOptions configures the API server.

type WorkflowCompleteRequest

type WorkflowCompleteRequest struct {
	WorkflowID string   `json:"workflow_id"`
	Backends   []string `json:"backends"`
}

WorkflowCompleteRequest is the request body for the workflow/complete endpoint.

type WorkflowCompleteResponse

type WorkflowCompleteResponse struct {
	Success bool   `json:"success"`
	Error   string `json:"error,omitempty"`
}

WorkflowCompleteResponse is the response body for the workflow/complete endpoint.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL