Documentation
¶
Overview ¶
Package selfhosted implements the SelfHosted LLM provider for routing chat requests to operator-run inference servers (llama-server, vLLM, Ollama, etc.) via the OpenAI-compatible Chat Completions API.
Index ¶
- type ChatCompletionRequest
- type ChatCompletionResponse
- type Choice
- type Config
- type EmbeddingData
- type EmbeddingRequest
- type EmbeddingResponse
- type FunctionCall
- type FunctionDef
- type JsonSchemaSpec
- type Message
- type ModelEndpoint
- type ModelInfo
- type ModelList
- type Provider
- func (p *Provider) AvailableModels() []llmproxy.ModelInfo
- func (p *Provider) Chat(ctx context.Context, req *llmproxy.ChatRequest) (*llmproxy.ChatResponse, error)
- func (p *Provider) DeleteFile(_ context.Context, _ llmproxy.FileID) (bool, error)
- func (p *Provider) Embed(ctx context.Context, req *llmproxy.EmbeddingRequest) (*llmproxy.EmbeddingResponse, error)
- func (p *Provider) EmbeddingDimension(model llmproxy.ModelID) (int, error)
- func (p *Provider) Health(ctx context.Context) error
- func (p *Provider) ProviderID() llmproxy.ProviderID
- func (p *Provider) UploadFile(_ context.Context, _ *llmproxy.FileUploadRequest) (*llmproxy.FileUploadResponse, error)
- type ResponseFormat
- type ResponseMessage
- type Tool
- type ToolCall
- type Usage
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ChatCompletionRequest ¶
type ChatCompletionRequest struct {
Model string `json:"model"`
Messages []Message `json:"messages"`
Tools []Tool `json:"tools,omitempty"`
ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
Temperature *float32 `json:"temperature,omitempty"`
TopP *float32 `json:"top_p,omitempty"`
MaxTokens *int `json:"max_tokens,omitempty"`
FrequencyPenalty *float32 `json:"frequency_penalty,omitempty"`
PresencePenalty *float32 `json:"presence_penalty,omitempty"`
}
ChatCompletionRequest represents a chat completion request.
type ChatCompletionResponse ¶
type ChatCompletionResponse struct {
ID string `json:"id"`
Choices []Choice `json:"choices"`
Usage Usage `json:"usage"`
}
ChatCompletionResponse represents a chat completion response.
type Choice ¶
type Choice struct {
Index int `json:"index"`
Message ResponseMessage `json:"message"`
FinishReason string `json:"finish_reason"`
}
Choice represents a single completion choice.
type Config ¶
type Config struct {
Endpoints []ModelEndpoint
}
Config is the typed configuration for the Local provider. One Config describes any number of endpoints (e.g. llama-server containers), each hosting one model with their declared capabilities.
type EmbeddingData ¶
EmbeddingData represents a single embedding vector in the response.
type EmbeddingRequest ¶
EmbeddingRequest represents a request to the /v1/embeddings endpoint.
type EmbeddingResponse ¶
type EmbeddingResponse struct {
Data []EmbeddingData `json:"data"`
Model string `json:"model"`
Usage Usage `json:"usage"`
}
EmbeddingResponse represents a response from the /v1/embeddings endpoint.
type FunctionCall ¶
type FunctionCall struct {
Name string `json:"name"`
Arguments json.RawMessage `json:"arguments"`
}
FunctionCall represents the function invocation within a tool call. Arguments is json.RawMessage because llama-server may return it as either a JSON string (OpenAI-compliant) or a JSON object (llama-server bug #20198).
type FunctionDef ¶
type FunctionDef struct {
Name string `json:"name"`
Description *string `json:"description,omitempty"`
Parameters map[string]any `json:"parameters,omitempty"`
Strict *bool `json:"strict,omitempty"`
}
FunctionDef defines a function tool's schema.
type JsonSchemaSpec ¶
type JsonSchemaSpec struct {
Name string `json:"name"`
Schema map[string]any `json:"schema"`
Description *string `json:"description,omitempty"`
Strict *bool `json:"strict,omitempty"`
}
JsonSchemaSpec defines a JSON schema for structured output.
type Message ¶
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
Name *string `json:"name,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
ToolCallID *string `json:"tool_call_id,omitempty"`
}
Message represents a chat message.
type ModelEndpoint ¶ added in v1.0.2
type ModelEndpoint struct {
URL string
APIKey string
ID llmproxy.ModelID
Label string
Capabilities []llmproxy.ModelCapability
Dimension *int // Only needed for embedding models
TokenModifier float64
}
ModelEndpoint describes a single inference server URL and the model it serves.
type ModelList ¶
type ModelList struct {
Data []ModelInfo `json:"data"`
}
ModelList represents the response from the /v1/models endpoint.
type Provider ¶
type Provider struct {
// contains filtered or unexported fields
}
Provider routes chat and embedding requests to operator-run inference server endpoints.
func NewProvider ¶
NewProvider builds a Provider under the default "SelfHosted" id from a validated Config. Returns nil if the config has no models (defensive — Validate already rejects this).
func (*Provider) AvailableModels ¶
AvailableModels returns the static model list built at construction time.
func (*Provider) Chat ¶
func (p *Provider) Chat(ctx context.Context, req *llmproxy.ChatRequest) (*llmproxy.ChatResponse, error)
Chat sends a chat completion request to the endpoint configured for the request's model.
func (*Provider) DeleteFile ¶
DeleteFile is not supported by local inference servers.
func (*Provider) Embed ¶
func (p *Provider) Embed(ctx context.Context, req *llmproxy.EmbeddingRequest) (*llmproxy.EmbeddingResponse, error)
Embed generates embeddings by sending a request to the local /v1/embeddings endpoint.
func (*Provider) EmbeddingDimension ¶
EmbeddingDimension returns the declared output dimension for the given embedding model.
func (*Provider) ProviderID ¶
func (p *Provider) ProviderID() llmproxy.ProviderID
ProviderID returns the unique identifier of this provider.
func (*Provider) UploadFile ¶
func (p *Provider) UploadFile(_ context.Context, _ *llmproxy.FileUploadRequest) (*llmproxy.FileUploadResponse, error)
UploadFile is not supported by local inference servers.
type ResponseFormat ¶
type ResponseFormat struct {
Type string `json:"type"`
JsonSchema *JsonSchemaSpec `json:"json_schema,omitempty"`
}
ResponseFormat specifies the desired output format.
type ResponseMessage ¶
type ResponseMessage struct {
Role string `json:"role"`
Content *string `json:"content"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
}
ResponseMessage represents the assistant's response message.
type Tool ¶
type Tool struct {
Type string `json:"type"`
Function FunctionDef `json:"function"`
}
Tool represents a tool definition.
type ToolCall ¶
type ToolCall struct {
ID string `json:"id"`
Type string `json:"type"`
Function FunctionCall `json:"function"`
}
ToolCall represents a tool call made by the model.