Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ChatCompletionMetrics ¶ added in v0.3.0
type ChatCompletionMetrics interface {
// StartRequest initializes timing for a new request.
StartRequest(headers map[string]string)
// SetModel sets the model the request. This is usually called after parsing the request body .
SetModel(model string)
// SetBackend sets the selected backend when the routing decision has been made. This is usually called
// after parsing the request body to determine the model and invoke the routing logic.
SetBackend(backend *filterapi.Backend)
// RecordTokenUsage records token usage metrics.
RecordTokenUsage(ctx context.Context, inputTokens, outputTokens, totalTokens uint32, requestHeaderLabelMapping map[string]string, extraAttrs ...attribute.KeyValue)
// RecordRequestCompletion records latency metrics for the entire request.
RecordRequestCompletion(ctx context.Context, success bool, requestHeaderLabelMapping map[string]string, extraAttrs ...attribute.KeyValue)
// RecordTokenLatency records latency metrics for token generation.
RecordTokenLatency(ctx context.Context, tokens uint32, requestHeaderLabelMapping map[string]string, extraAttrs ...attribute.KeyValue)
// GetTimeToFirstTokenMs returns the time to first token in stream mode in milliseconds.
GetTimeToFirstTokenMs() float64
// GetInterTokenLatencyMs returns the inter token latency in stream mode in milliseconds.
GetInterTokenLatencyMs() float64
}
ChatCompletionMetrics is the interface for the chat completion AI Gateway metrics.
func NewChatCompletion ¶
func NewChatCompletion(meter metric.Meter, requestHeaderLabelMapping map[string]string) ChatCompletionMetrics
NewChatCompletion creates a new x.ChatCompletionMetrics instance.
type EmbeddingsMetrics ¶ added in v0.3.0
type EmbeddingsMetrics interface {
// StartRequest initializes timing for a new request.
StartRequest(headers map[string]string)
// SetModel sets the model the request. This is usually called after parsing the request body .
SetModel(model string)
// SetBackend sets the selected backend when the routing decision has been made. This is usually called
// after parsing the request body to determine the model and invoke the routing logic.
SetBackend(backend *filterapi.Backend)
// RecordTokenUsage records token usage metrics for embeddings (only input and total tokens are relevant).
RecordTokenUsage(ctx context.Context, inputTokens, totalTokens uint32, requestHeaderLabelMapping map[string]string, extraAttrs ...attribute.KeyValue)
// RecordRequestCompletion records latency metrics for the entire request.
RecordRequestCompletion(ctx context.Context, success bool, requestHeaderLabelMapping map[string]string, extraAttrs ...attribute.KeyValue)
}
EmbeddingsMetrics is the interface for the embeddings AI Gateway metrics.
func NewEmbeddings ¶ added in v0.3.0
func NewEmbeddings(meter metric.Meter, requestHeaderLabelMapping map[string]string) EmbeddingsMetrics
NewEmbeddings creates a new Embeddings instance.
Click to show internal directories.
Click to hide internal directories.