Documentation
¶
Overview ¶
Package llm provides a internal representations of LLM inference API requests and responses which are then further mutated and handled.
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ChatRequest ¶
type ChatRequest struct {
// Model name (e.g., "gpt-4", "claude-3-sonnet", "llama2")
Model string `json:"model"`
// Conversation messages
Messages []Message `json:"messages"`
// Whether to stream the response
Stream *bool `json:"stream,omitempty"`
// System prompt (some providers handle this separately from messages)
System string `json:"system,omitempty"`
// Generation parameters (unified across providers)
MaxTokens *int `json:"max_tokens,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
TopK *int `json:"top_k,omitempty"`
Stop []string `json:"stop,omitempty"`
Seed *int `json:"seed,omitempty"`
// Provider-specific fields that don't map to common parameters
Extra map[string]any `json:"extra,omitempty"`
// RawRequest preserves the original request payload for cases where
// parsing is incomplete or for debugging.
RawRequest json.RawMessage `json:"raw_request,omitempty"`
}
ChatRequest represents a provider-agnostic chat completion request. This is the internal representation used by the proxy after parsing provider-specific request formats.
type ChatResponse ¶
type ChatResponse struct {
// Model that generated the response
Model string `json:"model"`
// Response timestamp
CreatedAt time.Time `json:"created_at,omitzero"`
// The assistant's response message
Message Message `json:"message"`
// Whether generation is complete (for streaming)
Done bool `json:"done"`
// Stop reason (e.g., "stop", "length", "tool_use", "end_turn")
StopReason string `json:"stop_reason,omitempty"`
// Token usage and timing metrics
Usage *Usage `json:"usage,omitempty"`
// Provider-specific fields that don't map to common parameters
Extra map[string]any `json:"extra,omitempty"`
// RawResponse preserves the original response payload for cases where
// parsing is incomplete or for debugging.
RawResponse json.RawMessage `json:"raw_response,omitempty"`
}
ChatResponse represents a provider-agnostic chat completion response. This is the internal representation used by the proxy after parsing provider-specific response formats.
type ContentBlock ¶
type ContentBlock struct {
Type string `json:"type"` // "text", "image", "tool_use", "tool_result"
// Text content (type="text")
Text string `json:"text,omitempty"`
// Image content (type="image")
ImageURL string `json:"image_url,omitempty"` // URL to image
ImageBase64 string `json:"image_base64,omitempty"` // Base64-encoded image data
MediaType string `json:"media_type,omitempty"` // MIME type (e.g., "image/png")
// Tool use (type="tool_use") - assistant requesting tool execution
ToolUseID string `json:"tool_use_id,omitempty"`
ToolName string `json:"tool_name,omitempty"`
ToolInput map[string]any `json:"tool_input,omitempty"`
// Tool result (type="tool_result") - result from tool execution
ToolResultID string `json:"tool_result_id,omitempty"` // References the tool_use_id
ToolOutput string `json:"tool_output,omitempty"`
IsError bool `json:"is_error,omitempty"`
}
ContentBlock represents a single piece of content within a message. The Type field determines which other fields are populated.
type ConversationTurn ¶
type ConversationTurn struct {
Provider string `json:"provider"`
Request *ChatRequest `json:"request"`
Response *ChatResponse `json:"response"`
}
ConversationTurn represents a complete request-response pair for storage in the DAG.
type ErrorResponse ¶
type ErrorResponse struct {
Error string `json:"error"`
}
ErrorResponse represents an error from the LLM API.
type Message ¶
type Message struct {
Role string `json:"role"` // "system", "user", "assistant", "tool"
Content []ContentBlock `json:"content"` // Array of content blocks
}
Message represents a single message in a conversation. Content is stored as an array of ContentBlocks to support multimodal content (text, images, tool use, etc.) in a provider-agnostic way.
func NewTextMessage ¶
NewTextMessage creates a simple text message with the given role and content.
type StreamChunk ¶
type StreamChunk struct {
// Model that generated the chunk
Model string `json:"model"`
// Chunk timestamp
CreatedAt time.Time `json:"created_at,omitzero"`
// The content of this chunk (typically a partial message)
Message Message `json:"message"`
// Whether this is the final chunk
Done bool `json:"done"`
// Index for providers that support multiple parallel completions
Index int `json:"index,omitempty"`
// Stop reason (only present on final chunk)
StopReason string `json:"stop_reason,omitempty"`
// Usage metrics (typically only present on final chunk)
Usage *Usage `json:"usage,omitempty"`
}
StreamChunk represents a single chunk in a streaming response. This is the internal representation used by the proxy after parsing provider-specific streaming formats.
type Usage ¶
type Usage struct {
// Token counts
PromptTokens int `json:"prompt_tokens,omitempty"`
CompletionTokens int `json:"completion_tokens,omitempty"`
TotalTokens int `json:"total_tokens,omitempty"`
// Cache token counts (Anthropic prompt caching)
CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"`
CacheReadInputTokens int `json:"cache_read_input_tokens,omitempty"`
// Timing (provider-specific, but normalized to nanoseconds where possible)
TotalDurationNs int64 `json:"total_duration_ns,omitempty"`
PromptDurationNs int64 `json:"prompt_duration_ns,omitempty"`
}
Usage contains token counts and timing information.