llmproxy

package
v0.47.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 3, 2026 License: MIT Imports: 23 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func GenerateGeminiRequestID added in v0.29.0

func GenerateGeminiRequestID() string

GenerateGeminiRequestID creates a new request ID with the "gr-" prefix.

func GenerateGeminiTraceID added in v0.29.0

func GenerateGeminiTraceID() string

GenerateGeminiTraceID creates a new trace ID with the "gt-" prefix.

func GenerateRequestID

func GenerateRequestID() string

GenerateRequestID creates a new request ID with the "ar-" prefix.

func GenerateTraceID

func GenerateTraceID() string

GenerateTraceID creates a new trace ID with the "at-" prefix.

func NewLogger

func NewLogger(level slog.Level) *slog.Logger

NewLogger creates a structured JSON logger for the LLM proxy.

func ParseNonStreamingResponse

func ParseNonStreamingResponse(body []byte) (model, msgID string, u anthropic.Usage, err error)

ParseNonStreamingResponse parses a complete JSON response from the Anthropic messages API.

func ParseStreamEvent

func ParseStreamEvent(data []byte) (eventType, model, msgID string, u anthropic.Usage, hasUsage bool)

ParseStreamEvent parses a single SSE data payload from an Anthropic streaming response. Returns the event type, model, message ID, usage, and whether usage data was found.

Types

type Config

type Config struct {
	ListenAddr         string // HTTP listen address, e.g. ":8081"
	DatabaseURL        string // proxy's own PostgreSQL connection URL
	AgentserverURL     string // agentserver internal API URL for token validation
	AnthropicBaseURL   string // upstream Anthropic API URL
	AnthropicAPIKey    string // real Anthropic API key
	AnthropicAuthToken string // alternative: Bearer token auth
	GeminiBaseURL      string // upstream Gemini API URL
	GeminiAPIKey       string // real Google API key for Gemini
	TraceHeader        string // custom trace header name
	DefaultMaxRPD      int    // default max requests per day per workspace (0 = unlimited)
}

Config holds all configuration for the LLM proxy.

func LoadConfigFromEnv

func LoadConfigFromEnv() Config

LoadConfigFromEnv reads configuration from environment variables.

type GeminiUsageMetadata added in v0.29.0

type GeminiUsageMetadata struct {
	PromptTokenCount        int64 `json:"promptTokenCount,omitempty"`
	CandidatesTokenCount    int64 `json:"candidatesTokenCount,omitempty"`
	CachedContentTokenCount int64 `json:"cachedContentTokenCount,omitempty"`
	TotalTokenCount         int64 `json:"totalTokenCount,omitempty"`
	ThoughtsTokenCount      int64 `json:"thoughtsTokenCount,omitempty"`
}

GeminiUsageMetadata holds token counts from a Gemini API response.

func ParseGeminiNonStreamingResponse added in v0.29.0

func ParseGeminiNonStreamingResponse(body []byte) (model string, usage GeminiUsageMetadata, err error)

ParseGeminiNonStreamingResponse parses a complete JSON response from Gemini generateContent.

func ParseGeminiStreamChunk added in v0.29.0

func ParseGeminiStreamChunk(data []byte) (model string, usage GeminiUsageMetadata, hasUsage bool, hasParts bool)

ParseGeminiStreamChunk parses a single SSE data payload from a Gemini streaming response. Returns model, usage, whether usage was present, and whether content parts were present.

type QueryOpts

type QueryOpts struct {
	WorkspaceID string
	SandboxID   string
	Since       time.Time
	Limit       int
	Offset      int
}

QueryOpts filters for usage/trace queries.

type Server

type Server struct {
	// contains filtered or unexported fields
}

Server is the LLM proxy HTTP server.

func NewServer

func NewServer(cfg Config, store *Store, logger *slog.Logger) *Server

NewServer creates a new LLM proxy server.

func (*Server) ExtractGeminiTraceID added in v0.29.0

func (s *Server) ExtractGeminiTraceID(r *http.Request, body []byte) (string, string)

ExtractGeminiTraceID extracts a trace ID from the request for Gemini. Same priority as ExtractTraceID but uses gt- prefix for auto-generated IDs.

func (*Server) ExtractTraceID

func (s *Server) ExtractTraceID(r *http.Request, body []byte) (string, string)

ExtractTraceID extracts a trace ID from the request. Priority: custom header → OpenCode/Claude Code session header → auto-generate. Returns (traceID, source).

func (*Server) Routes

func (s *Server) Routes() http.Handler

Routes returns the HTTP handler with all routes configured.

func (*Server) ValidateProxyToken

func (s *Server) ValidateProxyToken(ctx context.Context, proxyToken string) (*TokenInfo, error)

ValidateProxyToken calls the agentserver internal API to validate a proxy token (sandbox- or workspace-scoped). Returns nil (not error) if the token is invalid.

type Store

type Store struct {
	// contains filtered or unexported fields
}

Store wraps a *sql.DB for the LLM proxy's own database.

func NewStore

func NewStore(databaseURL string) (*Store, error)

NewStore connects to the proxy database and runs migrations.

func (*Store) Close

func (s *Store) Close() error

Close closes the underlying database connection.

func (*Store) CountTodayRequests added in v0.15.1

func (s *Store) CountTodayRequests(workspaceID string) (int64, error)

CountTodayRequests returns the number of LLM API requests for a workspace since the start of today (UTC).

func (*Store) DeleteWorkspaceQuota added in v0.15.1

func (s *Store) DeleteWorkspaceQuota(workspaceID string) error

DeleteWorkspaceQuota removes the quota override for a workspace.

func (*Store) GetOrCreateTrace

func (s *Store) GetOrCreateTrace(traceID, sandboxID, workspaceID, source string) (*Trace, error)

GetOrCreateTrace returns an existing trace or creates a new one. Uses INSERT ... ON CONFLICT to avoid TOCTOU races under concurrent requests.

func (*Store) GetTraceDetail

func (s *Store) GetTraceDetail(traceID string) (*Trace, []TokenUsage, error)

GetTraceDetail returns a trace and all its usage records.

func (*Store) GetWorkspaceQuota added in v0.15.1

func (s *Store) GetWorkspaceQuota(workspaceID string) (*WorkspaceQuota, error)

GetWorkspaceQuota returns the quota override for a workspace, or nil if none exists.

func (*Store) QueryTraces

func (s *Store) QueryTraces(opts QueryOpts) ([]TraceWithStats, int64, error)

QueryTraces returns traces with aggregated statistics and total count.

func (*Store) QueryUsage

func (s *Store) QueryUsage(opts QueryOpts) ([]UsageSummary, error)

QueryUsage returns aggregated usage grouped by provider and model.

func (*Store) RecordUsage

func (s *Store) RecordUsage(u TokenUsage) error

RecordUsage inserts a single API request usage record.

func (*Store) SetWorkspaceQuota added in v0.15.1

func (s *Store) SetWorkspaceQuota(workspaceID string, maxRPD *int) error

SetWorkspaceQuota upserts the quota override for a workspace.

func (*Store) UpdateTraceActivity

func (s *Store) UpdateTraceActivity(traceID string) error

UpdateTraceActivity updates the updated_at timestamp on a trace.

type TokenInfo added in v0.47.0

type TokenInfo struct {
	TokenType              string `json:"token_type"`
	SandboxID              string `json:"sandbox_id,omitempty"`
	WorkspaceID            string `json:"workspace_id"`
	Status                 string `json:"status"`
	ModelserverUpstreamURL string `json:"modelserver_upstream_url,omitempty"`
}

TokenInfo is returned by the agentserver token validation API. It covers both sandbox-scoped tokens (issued at sandbox creation) and workspace- scoped tokens (issued by cc-broker for turn workers).

For TokenType == "sandbox": SandboxID is set; Status reflects the live sandbox status and must be 'running' or 'creating'.

For TokenType == "workspace": SandboxID is empty; Status is always "active" — workspace tokens have no lifecycle gating.

type TokenUsage

type TokenUsage struct {
	ID                       string    `json:"id"`
	TraceID                  string    `json:"trace_id,omitempty"`
	SandboxID                string    `json:"sandbox_id"`
	WorkspaceID              string    `json:"workspace_id"`
	Provider                 string    `json:"provider"`
	Model                    string    `json:"model"`
	MessageID                string    `json:"message_id,omitempty"`
	InputTokens              int64     `json:"input_tokens"`
	OutputTokens             int64     `json:"output_tokens"`
	CacheCreationInputTokens int64     `json:"cache_creation_input_tokens"`
	CacheReadInputTokens     int64     `json:"cache_read_input_tokens"`
	Streaming                bool      `json:"streaming"`
	Duration                 int64     `json:"duration"`
	TTFT                     int64     `json:"ttft"`
	CreatedAt                time.Time `json:"created_at"`
}

TokenUsage records a single LLM API request's token usage.

type Trace

type Trace struct {
	ID          string    `json:"id"`
	SandboxID   string    `json:"sandbox_id"`
	WorkspaceID string    `json:"workspace_id"`
	Source      string    `json:"source"`
	CreatedAt   time.Time `json:"created_at"`
	UpdatedAt   time.Time `json:"updated_at"`
}

Trace represents a logical session/trace spanning multiple API requests.

type TraceWithStats

type TraceWithStats struct {
	Trace
	RequestCount             int64  `json:"request_count"`
	TotalInputTokens         int64  `json:"total_input_tokens"`
	TotalOutputTokens        int64  `json:"total_output_tokens"`
	TotalCacheReadTokens     int64  `json:"total_cache_read_tokens"`
	TotalCacheCreationTokens int64  `json:"total_cache_creation_tokens"`
	Models                   string `json:"models"`
}

TraceWithStats is a trace with aggregated request statistics.

type UsageSummary

type UsageSummary struct {
	Provider                 string `json:"provider"`
	Model                    string `json:"model"`
	InputTokens              int64  `json:"input_tokens"`
	OutputTokens             int64  `json:"output_tokens"`
	CacheCreationInputTokens int64  `json:"cache_creation_input_tokens"`
	CacheReadInputTokens     int64  `json:"cache_read_input_tokens"`
	RequestCount             int64  `json:"request_count"`
}

UsageSummary is an aggregated usage row grouped by provider+model.

type WorkspaceQuota added in v0.15.1

type WorkspaceQuota struct {
	WorkspaceID string    `json:"workspace_id"`
	MaxRPD      *int      `json:"max_rpd"`
	UpdatedAt   time.Time `json:"updated_at"`
}

WorkspaceQuota holds per-workspace quota overrides stored in the llmproxy DB.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL