llmproxy

package

v0.47.0 Latest Latest Go to latest Published: May 3, 2026 License: MIT Imports: 23 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/agentserver/agentserver

Links

Open Source Insights

Documentation ¶

Index ¶

func GenerateGeminiRequestID() string
func GenerateGeminiTraceID() string
func GenerateRequestID() string
func GenerateTraceID() string
func NewLogger(level slog.Level) *slog.Logger
func ParseNonStreamingResponse(body []byte) (model, msgID string, u anthropic.Usage, err error)
func ParseStreamEvent(data []byte) (eventType, model, msgID string, u anthropic.Usage, hasUsage bool)
type Config
- func LoadConfigFromEnv() Config
type GeminiUsageMetadata
- func ParseGeminiNonStreamingResponse(body []byte) (model string, usage GeminiUsageMetadata, err error)
- func ParseGeminiStreamChunk(data []byte) (model string, usage GeminiUsageMetadata, hasUsage bool, hasParts bool)
type QueryOpts
type Server
- func NewServer(cfg Config, store *Store, logger *slog.Logger) *Server
- func (s *Server) ExtractGeminiTraceID(r *http.Request, body []byte) (string, string)
- func (s *Server) ExtractTraceID(r *http.Request, body []byte) (string, string)
- func (s *Server) Routes() http.Handler
- func (s *Server) ValidateProxyToken(ctx context.Context, proxyToken string) (*TokenInfo, error)
type Store
- func NewStore(databaseURL string) (*Store, error)
- func (s *Store) Close() error
- func (s *Store) CountTodayRequests(workspaceID string) (int64, error)
- func (s *Store) DeleteWorkspaceQuota(workspaceID string) error
- func (s *Store) GetOrCreateTrace(traceID, sandboxID, workspaceID, source string) (*Trace, error)
- func (s *Store) GetTraceDetail(traceID string) (*Trace, []TokenUsage, error)
- func (s *Store) GetWorkspaceQuota(workspaceID string) (*WorkspaceQuota, error)
- func (s *Store) QueryTraces(opts QueryOpts) ([]TraceWithStats, int64, error)
- func (s *Store) QueryUsage(opts QueryOpts) ([]UsageSummary, error)
- func (s *Store) RecordUsage(u TokenUsage) error
- func (s *Store) SetWorkspaceQuota(workspaceID string, maxRPD *int) error
- func (s *Store) UpdateTraceActivity(traceID string) error
type TokenInfo
type TokenUsage
type Trace
type TraceWithStats
type UsageSummary
type WorkspaceQuota

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func GenerateGeminiRequestID ¶ added in v0.29.0

func GenerateGeminiRequestID() string

GenerateGeminiRequestID creates a new request ID with the "gr-" prefix.

func GenerateGeminiTraceID ¶ added in v0.29.0

func GenerateGeminiTraceID() string

GenerateGeminiTraceID creates a new trace ID with the "gt-" prefix.

func GenerateRequestID ¶

func GenerateRequestID() string

GenerateRequestID creates a new request ID with the "ar-" prefix.

func GenerateTraceID ¶

func GenerateTraceID() string

GenerateTraceID creates a new trace ID with the "at-" prefix.

func NewLogger ¶

func NewLogger(level slog.Level) *slog.Logger

NewLogger creates a structured JSON logger for the LLM proxy.

func ParseNonStreamingResponse ¶

func ParseNonStreamingResponse(body []byte) (model, msgID string, u anthropic.Usage, err error)

ParseNonStreamingResponse parses a complete JSON response from the Anthropic messages API.

func ParseStreamEvent ¶

func ParseStreamEvent(data []byte) (eventType, model, msgID string, u anthropic.Usage, hasUsage bool)

ParseStreamEvent parses a single SSE data payload from an Anthropic streaming response. Returns the event type, model, message ID, usage, and whether usage data was found.

Types ¶

type Config ¶

type Config struct {
	ListenAddr         string // HTTP listen address, e.g. ":8081"
	DatabaseURL        string // proxy's own PostgreSQL connection URL
	AgentserverURL     string // agentserver internal API URL for token validation
	AnthropicBaseURL   string // upstream Anthropic API URL
	AnthropicAPIKey    string // real Anthropic API key
	AnthropicAuthToken string // alternative: Bearer token auth
	GeminiBaseURL      string // upstream Gemini API URL
	GeminiAPIKey       string // real Google API key for Gemini
	TraceHeader        string // custom trace header name
	DefaultMaxRPD      int    // default max requests per day per workspace (0 = unlimited)
}

Config holds all configuration for the LLM proxy.

func LoadConfigFromEnv ¶

func LoadConfigFromEnv() Config

LoadConfigFromEnv reads configuration from environment variables.

type GeminiUsageMetadata ¶ added in v0.29.0

type GeminiUsageMetadata struct {
	PromptTokenCount        int64 `json:"promptTokenCount,omitempty"`
	CandidatesTokenCount    int64 `json:"candidatesTokenCount,omitempty"`
	CachedContentTokenCount int64 `json:"cachedContentTokenCount,omitempty"`
	TotalTokenCount         int64 `json:"totalTokenCount,omitempty"`
	ThoughtsTokenCount      int64 `json:"thoughtsTokenCount,omitempty"`
}

GeminiUsageMetadata holds token counts from a Gemini API response.

func ParseGeminiNonStreamingResponse ¶ added in v0.29.0

func ParseGeminiNonStreamingResponse(body []byte) (model string, usage GeminiUsageMetadata, err error)

ParseGeminiNonStreamingResponse parses a complete JSON response from Gemini generateContent.

func ParseGeminiStreamChunk ¶ added in v0.29.0

func ParseGeminiStreamChunk(data []byte) (model string, usage GeminiUsageMetadata, hasUsage bool, hasParts bool)

ParseGeminiStreamChunk parses a single SSE data payload from a Gemini streaming response. Returns model, usage, whether usage was present, and whether content parts were present.

type QueryOpts ¶

type QueryOpts struct {
	WorkspaceID string
	SandboxID   string
	Since       time.Time
	Limit       int
	Offset      int
}

QueryOpts filters for usage/trace queries.

type Server ¶

type Server struct {
	// contains filtered or unexported fields
}

Server is the LLM proxy HTTP server.

func NewServer ¶

func NewServer(cfg Config, store *Store, logger *slog.Logger) *Server

NewServer creates a new LLM proxy server.

func (*Server) ExtractGeminiTraceID ¶ added in v0.29.0

func (s *Server) ExtractGeminiTraceID(r *http.Request, body []byte) (string, string)

ExtractGeminiTraceID extracts a trace ID from the request for Gemini. Same priority as ExtractTraceID but uses gt- prefix for auto-generated IDs.

func (*Server) ExtractTraceID ¶

func (s *Server) ExtractTraceID(r *http.Request, body []byte) (string, string)

ExtractTraceID extracts a trace ID from the request. Priority: custom header → OpenCode/Claude Code session header → auto-generate. Returns (traceID, source).

func (*Server) Routes ¶

func (s *Server) Routes() http.Handler

Routes returns the HTTP handler with all routes configured.

func (*Server) ValidateProxyToken ¶

func (s *Server) ValidateProxyToken(ctx context.Context, proxyToken string) (*TokenInfo, error)

ValidateProxyToken calls the agentserver internal API to validate a proxy token (sandbox- or workspace-scoped). Returns nil (not error) if the token is invalid.

type Store ¶

type Store struct {
	// contains filtered or unexported fields
}

Store wraps a *sql.DB for the LLM proxy's own database.

func NewStore ¶

func NewStore(databaseURL string) (*Store, error)

NewStore connects to the proxy database and runs migrations.

func (*Store) Close ¶

func (s *Store) Close() error

Close closes the underlying database connection.

func (*Store) CountTodayRequests ¶ added in v0.15.1

func (s *Store) CountTodayRequests(workspaceID string) (int64, error)

CountTodayRequests returns the number of LLM API requests for a workspace since the start of today (UTC).

func (*Store) DeleteWorkspaceQuota ¶ added in v0.15.1

func (s *Store) DeleteWorkspaceQuota(workspaceID string) error

DeleteWorkspaceQuota removes the quota override for a workspace.

func (*Store) GetOrCreateTrace ¶

func (s *Store) GetOrCreateTrace(traceID, sandboxID, workspaceID, source string) (*Trace, error)

GetOrCreateTrace returns an existing trace or creates a new one. Uses INSERT ... ON CONFLICT to avoid TOCTOU races under concurrent requests.

func (*Store) GetTraceDetail ¶

func (s *Store) GetTraceDetail(traceID string) (*Trace, []TokenUsage, error)

GetTraceDetail returns a trace and all its usage records.

func (*Store) GetWorkspaceQuota ¶ added in v0.15.1

func (s *Store) GetWorkspaceQuota(workspaceID string) (*WorkspaceQuota, error)

GetWorkspaceQuota returns the quota override for a workspace, or nil if none exists.

func (*Store) QueryTraces ¶

func (s *Store) QueryTraces(opts QueryOpts) ([]TraceWithStats, int64, error)

QueryTraces returns traces with aggregated statistics and total count.

func (*Store) QueryUsage ¶

func (s *Store) QueryUsage(opts QueryOpts) ([]UsageSummary, error)

QueryUsage returns aggregated usage grouped by provider and model.

func (*Store) RecordUsage ¶

func (s *Store) RecordUsage(u TokenUsage) error

RecordUsage inserts a single API request usage record.

func (*Store) SetWorkspaceQuota ¶ added in v0.15.1

func (s *Store) SetWorkspaceQuota(workspaceID string, maxRPD *int) error

SetWorkspaceQuota upserts the quota override for a workspace.

func (*Store) UpdateTraceActivity ¶

func (s *Store) UpdateTraceActivity(traceID string) error

UpdateTraceActivity updates the updated_at timestamp on a trace.

type TokenInfo ¶ added in v0.47.0

type TokenInfo struct {
	TokenType              string `json:"token_type"`
	SandboxID              string `json:"sandbox_id,omitempty"`
	WorkspaceID            string `json:"workspace_id"`
	Status                 string `json:"status"`
	ModelserverUpstreamURL string `json:"modelserver_upstream_url,omitempty"`
}

TokenInfo is returned by the agentserver token validation API. It covers both sandbox-scoped tokens (issued at sandbox creation) and workspace- scoped tokens (issued by cc-broker for turn workers).

For TokenType == "sandbox": SandboxID is set; Status reflects the live sandbox status and must be 'running' or 'creating'.

For TokenType == "workspace": SandboxID is empty; Status is always "active" — workspace tokens have no lifecycle gating.

type TokenUsage ¶

type TokenUsage struct {
	ID                       string    `json:"id"`
	TraceID                  string    `json:"trace_id,omitempty"`
	SandboxID                string    `json:"sandbox_id"`
	WorkspaceID              string    `json:"workspace_id"`
	Provider                 string    `json:"provider"`
	Model                    string    `json:"model"`
	MessageID                string    `json:"message_id,omitempty"`
	InputTokens              int64     `json:"input_tokens"`
	OutputTokens             int64     `json:"output_tokens"`
	CacheCreationInputTokens int64     `json:"cache_creation_input_tokens"`
	CacheReadInputTokens     int64     `json:"cache_read_input_tokens"`
	Streaming                bool      `json:"streaming"`
	Duration                 int64     `json:"duration"`
	TTFT                     int64     `json:"ttft"`
	CreatedAt                time.Time `json:"created_at"`
}

TokenUsage records a single LLM API request's token usage.

type Trace ¶

type Trace struct {
	ID          string    `json:"id"`
	SandboxID   string    `json:"sandbox_id"`
	WorkspaceID string    `json:"workspace_id"`
	Source      string    `json:"source"`
	CreatedAt   time.Time `json:"created_at"`
	UpdatedAt   time.Time `json:"updated_at"`
}

Trace represents a logical session/trace spanning multiple API requests.

type TraceWithStats ¶

type TraceWithStats struct {
	Trace
	RequestCount             int64  `json:"request_count"`
	TotalInputTokens         int64  `json:"total_input_tokens"`
	TotalOutputTokens        int64  `json:"total_output_tokens"`
	TotalCacheReadTokens     int64  `json:"total_cache_read_tokens"`
	TotalCacheCreationTokens int64  `json:"total_cache_creation_tokens"`
	Models                   string `json:"models"`
}

TraceWithStats is a trace with aggregated request statistics.

type UsageSummary ¶

type UsageSummary struct {
	Provider                 string `json:"provider"`
	Model                    string `json:"model"`
	InputTokens              int64  `json:"input_tokens"`
	OutputTokens             int64  `json:"output_tokens"`
	CacheCreationInputTokens int64  `json:"cache_creation_input_tokens"`
	CacheReadInputTokens     int64  `json:"cache_read_input_tokens"`
	RequestCount             int64  `json:"request_count"`
}

UsageSummary is an aggregated usage row grouped by provider+model.

type WorkspaceQuota ¶ added in v0.15.1

type WorkspaceQuota struct {
	WorkspaceID string    `json:"workspace_id"`
	MaxRPD      *int      `json:"max_rpd"`
	UpdatedAt   time.Time `json:"updated_at"`
}

WorkspaceQuota holds per-workspace quota overrides stored in the llmproxy DB.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL