Documentation
¶
Index ¶
- Constants
- func DefaultOrgIDExtractor(ctx context.Context, req *http.Request, meta llmproxy.BodyMetadata) string
- func DeriveCacheKeyFromPrefix(meta llmproxy.BodyMetadata, rawBody []byte) string
- type AddHeaderInterceptor
- type BillingInterceptor
- type CacheKeyExtractor
- type CacheKeyFunc
- type CacheRetention
- type Header
- type HeaderBanInterceptor
- type LoggingInterceptor
- type Metrics
- type MetricsInterceptor
- type OrgIDExtractor
- type PromptCachingConfig
- type PromptCachingInterceptor
- func NewAnthropicPromptCaching(retention CacheRetention) *PromptCachingInterceptor
- func NewAnthropicPromptCachingWithResult(retention CacheRetention, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewBedrockPromptCaching(retention CacheRetention) *PromptCachingInterceptor
- func NewBedrockPromptCachingWithResult(retention CacheRetention, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewFireworksPromptCaching(sessionID string) *PromptCachingInterceptor
- func NewFireworksPromptCachingAuto() *PromptCachingInterceptor
- func NewFireworksPromptCachingAutoWithResult(onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewFireworksPromptCachingWithExtractor(extractor CacheKeyExtractor) *PromptCachingInterceptor
- func NewFireworksPromptCachingWithExtractorAndResult(extractor CacheKeyExtractor, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewFireworksPromptCachingWithOrgExtractor(sessionID string, orgExtractor OrgIDExtractor) *PromptCachingInterceptor
- func NewFireworksPromptCachingWithResult(sessionID string, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewFireworksPromptCachingWithTraceID(traceExtractor TraceExtractor) *PromptCachingInterceptor
- func NewFireworksPromptCachingWithTraceIDAndResult(traceExtractor TraceExtractor, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewOpenAIPromptCaching(retention CacheRetention, cacheKey string) *PromptCachingInterceptor
- func NewOpenAIPromptCachingAuto(namespace string, retention CacheRetention) *PromptCachingInterceptor
- func NewOpenAIPromptCachingAutoWithOrgExtractor(retention CacheRetention, orgExtractor OrgIDExtractor) *PromptCachingInterceptor
- func NewOpenAIPromptCachingAutoWithResult(namespace string, retention CacheRetention, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewOpenAIPromptCachingWithNamespace(namespace string, retention CacheRetention, cacheKey string) *PromptCachingInterceptor
- func NewOpenAIPromptCachingWithOrgExtractor(retention CacheRetention, cacheKey string, orgExtractor OrgIDExtractor) *PromptCachingInterceptor
- func NewOpenAIPromptCachingWithResult(retention CacheRetention, cacheKey string, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewPromptCaching(provider string, config PromptCachingConfig) *PromptCachingInterceptor
- func NewPromptCachingWithResult(provider string, config PromptCachingConfig, ...) *PromptCachingInterceptor
- func NewXAIPromptCaching(convID string) *PromptCachingInterceptor
- func NewXAIPromptCachingAuto() *PromptCachingInterceptor
- func NewXAIPromptCachingAutoWithResult(onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewXAIPromptCachingWithExtractor(extractor CacheKeyExtractor) *PromptCachingInterceptor
- func NewXAIPromptCachingWithExtractorAndResult(extractor CacheKeyExtractor, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewXAIPromptCachingWithResult(convID string, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- func NewXAIPromptCachingWithTraceID(traceExtractor TraceExtractor) *PromptCachingInterceptor
- func NewXAIPromptCachingWithTraceIDAndResult(traceExtractor TraceExtractor, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
- type RetryInterceptor
- type TraceExtractor
- type TraceInfo
- type TracingInterceptor
Constants ¶
const ( HeaderCacheKey = "X-Cache-Key" HeaderOrgID = "X-Org-ID" HeaderFireworksSessionAffinity = "X-Session-Affinity" HeaderFireworksPromptCacheIsolation = "X-Prompt-Cache-Isolation-Key" )
Variables ¶
This section is empty.
Functions ¶
func DefaultOrgIDExtractor ¶ added in v0.0.3
func DeriveCacheKeyFromPrefix ¶ added in v0.0.3
func DeriveCacheKeyFromPrefix(meta llmproxy.BodyMetadata, rawBody []byte) string
Types ¶
type AddHeaderInterceptor ¶
func NewAddHeader ¶
func NewAddHeader(requestHeaders, responseHeaders []Header) *AddHeaderInterceptor
func NewAddRequestHeader ¶
func NewAddRequestHeader(headers ...Header) *AddHeaderInterceptor
func NewAddResponseHeader ¶
func NewAddResponseHeader(headers ...Header) *AddHeaderInterceptor
func (*AddHeaderInterceptor) Intercept ¶
func (i *AddHeaderInterceptor) Intercept(req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte, next llmproxy.RoundTripFunc) (*http.Response, llmproxy.ResponseMetadata, []byte, error)
type BillingInterceptor ¶
type BillingInterceptor struct {
// Lookup is the function that returns pricing for a provider/model.
Lookup llmproxy.CostLookup
// OnResult is called with the billing result after each successful request.
// This can be used to log, record to a database, or aggregate metrics.
OnResult func(llmproxy.BillingResult)
}
BillingInterceptor calculates and records the cost of each request. It uses a CostLookup function to determine pricing for each model.
func NewBilling ¶
func NewBilling(lookup llmproxy.CostLookup, onResult func(llmproxy.BillingResult)) *BillingInterceptor
NewBilling creates a new billing interceptor with the given lookup function.
Example:
lookup := func(provider, model string) (llmproxy.CostInfo, bool) {
// Your pricing database lookup
if model == "gpt-4" {
return llmproxy.CostInfo{Input: 30, Output: 60}, true
}
return llmproxy.CostInfo{}, false
}
billing := interceptors.NewBilling(lookup, func(r llmproxy.BillingResult) {
log.Printf("Cost: $%.6f for %s", r.TotalCost, r.Model)
})
func (*BillingInterceptor) Intercept ¶
func (i *BillingInterceptor) Intercept(req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte, next llmproxy.RoundTripFunc) (*http.Response, llmproxy.ResponseMetadata, []byte, error)
Intercept calculates the cost after a successful request and calls OnResult. If the model is not found in the lookup, no billing is recorded.
type CacheKeyExtractor ¶ added in v0.0.3
type CacheKeyExtractor func(ctx context.Context, req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte) string
func TraceIDCacheKeyExtractor ¶ added in v0.0.3
func TraceIDCacheKeyExtractor(traceExtractor TraceExtractor) CacheKeyExtractor
type CacheKeyFunc ¶ added in v0.0.3
type CacheKeyFunc func(meta llmproxy.BodyMetadata, rawBody []byte) string
type CacheRetention ¶ added in v0.0.3
type CacheRetention string
const ( CacheRetentionDefault CacheRetention = "" CacheRetention1h CacheRetention = "1h" CacheRetention24h CacheRetention = "24h" )
type HeaderBanInterceptor ¶
func NewHeaderBan ¶
func NewHeaderBan(requestHeaders, responseHeaders []string) *HeaderBanInterceptor
func NewRequestHeaderBan ¶
func NewRequestHeaderBan(headers ...string) *HeaderBanInterceptor
func NewResponseHeaderBan ¶
func NewResponseHeaderBan(headers ...string) *HeaderBanInterceptor
func (*HeaderBanInterceptor) Intercept ¶
func (i *HeaderBanInterceptor) Intercept(req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte, next llmproxy.RoundTripFunc) (*http.Response, llmproxy.ResponseMetadata, []byte, error)
type LoggingInterceptor ¶
type LoggingInterceptor struct {
// Logger is the destination for log output.
// If nil, a default logger is used.
Logger llmproxy.Logger
}
LoggingInterceptor logs request and response details. It records the model, method, URL, latency, and token usage.
func NewLogging ¶
func NewLogging(logger llmproxy.Logger) *LoggingInterceptor
NewLogging creates a new logging interceptor with the given logger. Pass nil to use a default logger that wraps log.Default().
func (*LoggingInterceptor) Intercept ¶
func (i *LoggingInterceptor) Intercept(req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte, next llmproxy.RoundTripFunc) (*http.Response, llmproxy.ResponseMetadata, []byte, error)
Intercept logs the request before execution and the response after. Log format:
- Request: [model] METHOD /path
- Success: [model] OK: tokens=prompt/completion (duration)
- Error: [model] ERROR: err (duration)
type Metrics ¶
type Metrics struct {
// TotalRequests is the total number of requests processed.
TotalRequests int64
// TotalTokens is the sum of all tokens consumed.
TotalTokens int64
// TotalPromptTokens is the sum of all prompt tokens consumed.
TotalPromptTokens int64
// TotalCompletionTokens is the sum of all completion tokens generated.
TotalCompletionTokens int64
// TotalLatency is the cumulative latency in nanoseconds.
TotalLatency int64
// Errors is the count of failed requests.
Errors int64
}
Metrics holds aggregated statistics about proxied requests. All fields are safe for concurrent access via atomic operations.
type MetricsInterceptor ¶
type MetricsInterceptor struct {
// Metrics is the destination for collected metrics.
Metrics *Metrics
}
MetricsInterceptor collects metrics about proxied requests. It tracks request counts, token usage, latency, and errors.
func NewMetrics ¶
func NewMetrics(m *Metrics) *MetricsInterceptor
NewMetrics creates a new metrics interceptor that records to the given Metrics struct. The Metrics struct should be created once and shared across all requests.
Example:
m := &interceptors.Metrics{}
proxy := llmproxy.NewProxy(provider,
llmproxy.WithInterceptor(interceptors.NewMetrics(m)),
)
// Later, read m.TotalRequests, etc.
func (*MetricsInterceptor) Intercept ¶
func (i *MetricsInterceptor) Intercept(req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte, next llmproxy.RoundTripFunc) (*http.Response, llmproxy.ResponseMetadata, []byte, error)
Intercept increments metrics counters and measures latency. It records:
- TotalRequests (always)
- TotalLatency (always)
- Errors (on failure)
- Token counts (on success)
type OrgIDExtractor ¶ added in v0.0.3
type PromptCachingConfig ¶ added in v0.0.3
type PromptCachingConfig struct {
Enabled bool
Retention CacheRetention
CacheKey string
Namespace string
CacheKeyFn CacheKeyFunc
CacheKeyExtractor CacheKeyExtractor
OrgIDExtractor OrgIDExtractor
}
type PromptCachingInterceptor ¶ added in v0.0.3
type PromptCachingInterceptor struct {
// contains filtered or unexported fields
}
func NewAnthropicPromptCaching ¶ added in v0.0.3
func NewAnthropicPromptCaching(retention CacheRetention) *PromptCachingInterceptor
func NewAnthropicPromptCachingWithResult ¶ added in v0.0.3
func NewAnthropicPromptCachingWithResult(retention CacheRetention, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewBedrockPromptCaching ¶ added in v0.0.3
func NewBedrockPromptCaching(retention CacheRetention) *PromptCachingInterceptor
func NewBedrockPromptCachingWithResult ¶ added in v0.0.3
func NewBedrockPromptCachingWithResult(retention CacheRetention, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewFireworksPromptCaching ¶ added in v0.0.3
func NewFireworksPromptCaching(sessionID string) *PromptCachingInterceptor
func NewFireworksPromptCachingAuto ¶ added in v0.0.3
func NewFireworksPromptCachingAuto() *PromptCachingInterceptor
func NewFireworksPromptCachingAutoWithResult ¶ added in v0.0.3
func NewFireworksPromptCachingAutoWithResult(onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewFireworksPromptCachingWithExtractor ¶ added in v0.0.3
func NewFireworksPromptCachingWithExtractor(extractor CacheKeyExtractor) *PromptCachingInterceptor
func NewFireworksPromptCachingWithExtractorAndResult ¶ added in v0.0.3
func NewFireworksPromptCachingWithExtractorAndResult(extractor CacheKeyExtractor, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewFireworksPromptCachingWithOrgExtractor ¶ added in v0.0.3
func NewFireworksPromptCachingWithOrgExtractor(sessionID string, orgExtractor OrgIDExtractor) *PromptCachingInterceptor
func NewFireworksPromptCachingWithResult ¶ added in v0.0.3
func NewFireworksPromptCachingWithResult(sessionID string, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewFireworksPromptCachingWithTraceID ¶ added in v0.0.3
func NewFireworksPromptCachingWithTraceID(traceExtractor TraceExtractor) *PromptCachingInterceptor
func NewFireworksPromptCachingWithTraceIDAndResult ¶ added in v0.0.3
func NewFireworksPromptCachingWithTraceIDAndResult(traceExtractor TraceExtractor, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewOpenAIPromptCaching ¶ added in v0.0.3
func NewOpenAIPromptCaching(retention CacheRetention, cacheKey string) *PromptCachingInterceptor
func NewOpenAIPromptCachingAuto ¶ added in v0.0.3
func NewOpenAIPromptCachingAuto(namespace string, retention CacheRetention) *PromptCachingInterceptor
func NewOpenAIPromptCachingAutoWithOrgExtractor ¶ added in v0.0.3
func NewOpenAIPromptCachingAutoWithOrgExtractor(retention CacheRetention, orgExtractor OrgIDExtractor) *PromptCachingInterceptor
func NewOpenAIPromptCachingAutoWithResult ¶ added in v0.0.3
func NewOpenAIPromptCachingAutoWithResult(namespace string, retention CacheRetention, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewOpenAIPromptCachingWithNamespace ¶ added in v0.0.3
func NewOpenAIPromptCachingWithNamespace(namespace string, retention CacheRetention, cacheKey string) *PromptCachingInterceptor
func NewOpenAIPromptCachingWithOrgExtractor ¶ added in v0.0.3
func NewOpenAIPromptCachingWithOrgExtractor(retention CacheRetention, cacheKey string, orgExtractor OrgIDExtractor) *PromptCachingInterceptor
func NewOpenAIPromptCachingWithResult ¶ added in v0.0.3
func NewOpenAIPromptCachingWithResult(retention CacheRetention, cacheKey string, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewPromptCaching ¶ added in v0.0.3
func NewPromptCaching(provider string, config PromptCachingConfig) *PromptCachingInterceptor
func NewPromptCachingWithResult ¶ added in v0.0.3
func NewPromptCachingWithResult(provider string, config PromptCachingConfig, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewXAIPromptCaching ¶ added in v0.0.3
func NewXAIPromptCaching(convID string) *PromptCachingInterceptor
func NewXAIPromptCachingAuto ¶ added in v0.0.3
func NewXAIPromptCachingAuto() *PromptCachingInterceptor
func NewXAIPromptCachingAutoWithResult ¶ added in v0.0.3
func NewXAIPromptCachingAutoWithResult(onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewXAIPromptCachingWithExtractor ¶ added in v0.0.3
func NewXAIPromptCachingWithExtractor(extractor CacheKeyExtractor) *PromptCachingInterceptor
func NewXAIPromptCachingWithExtractorAndResult ¶ added in v0.0.3
func NewXAIPromptCachingWithExtractorAndResult(extractor CacheKeyExtractor, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewXAIPromptCachingWithResult ¶ added in v0.0.3
func NewXAIPromptCachingWithResult(convID string, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func NewXAIPromptCachingWithTraceID ¶ added in v0.0.3
func NewXAIPromptCachingWithTraceID(traceExtractor TraceExtractor) *PromptCachingInterceptor
func NewXAIPromptCachingWithTraceIDAndResult ¶ added in v0.0.3
func NewXAIPromptCachingWithTraceIDAndResult(traceExtractor TraceExtractor, onResult func(llmproxy.CacheUsage)) *PromptCachingInterceptor
func (*PromptCachingInterceptor) Intercept ¶ added in v0.0.3
func (i *PromptCachingInterceptor) Intercept(req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte, next llmproxy.RoundTripFunc) (*http.Response, llmproxy.ResponseMetadata, []byte, error)
type RetryInterceptor ¶
type RetryInterceptor struct {
MaxAttempts int
Delay time.Duration
IsRetryable func(*http.Response, error) bool
UseRateLimitHeaders bool
}
func NewRetryWithPredicate ¶
func NewRetryWithRateLimitHeaders ¶
func NewRetryWithRateLimitHeaders(maxAttempts int, defaultDelay time.Duration) *RetryInterceptor
func (*RetryInterceptor) Intercept ¶
func (i *RetryInterceptor) Intercept(req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte, next llmproxy.RoundTripFunc) (*http.Response, llmproxy.ResponseMetadata, []byte, error)
type TraceExtractor ¶
TraceExtractor extracts trace information from a request context. Return empty TraceInfo if no trace context is available.
type TraceInfo ¶
type TraceInfo struct {
// TraceID is the 16-byte trace identifier (32 hex chars).
TraceID [16]byte
// SpanID is the 8-byte span identifier (16 hex chars).
SpanID [8]byte
// Sampled indicates whether the trace is sampled.
Sampled bool
}
TraceInfo holds OpenTelemetry trace context information.
type TracingInterceptor ¶
type TracingInterceptor struct {
// Extract extracts trace info from the incoming request context.
// If nil, no trace headers are added.
Extract TraceExtractor
// ResponseHeader is the header name for the trace ID in the response.
// Defaults to "X-Request-ID" if empty.
ResponseHeader string
}
TracingInterceptor adds OpenTelemetry trace headers to upstream requests and propagates the trace ID back as a response header for correlation.
func NewTracing ¶
func NewTracing(extractor TraceExtractor) *TracingInterceptor
NewTracing creates a tracing interceptor with the given trace extractor.
The extractor function should pull trace context from the incoming request and return TraceInfo. For OpenTelemetry, you can use:
func otelExtractor(ctx context.Context) interceptors.TraceInfo {
span := trace.SpanFromContext(ctx)
if !span.SpanContext().IsValid() {
return interceptors.TraceInfo{}
}
return interceptors.TraceInfo{
TraceID: span.SpanContext().TraceID(),
SpanID: span.SpanContext().SpanID(),
Sampled: span.SpanContext().IsSampled(),
}
}
Example:
tracing := interceptors.NewTracing(otelExtractor) proxy := llmproxy.NewProxy(provider, llmproxy.WithInterceptor(tracing))
func NewTracingWithHeader ¶
func NewTracingWithHeader(extractor TraceExtractor, responseHeader string) *TracingInterceptor
NewTracingWithHeader creates a tracing interceptor with a custom response header name.
func (*TracingInterceptor) Intercept ¶
func (i *TracingInterceptor) Intercept(req *http.Request, meta llmproxy.BodyMetadata, rawBody []byte, next llmproxy.RoundTripFunc) (*http.Response, llmproxy.ResponseMetadata, []byte, error)
Intercept adds trace headers to the upstream request and sets the response header.
Upstream headers set:
- X-Request-ID: the trace ID (32 hex chars)
- traceparent: W3C Trace Context format (version-traceid-spanid-flags)
Response header set:
- X-Request-ID (or custom ResponseHeader): the trace ID for correlation