Documentation
¶
Overview ¶
Copyright 2026 Teradata
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2026 Teradata ¶
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Index ¶
- type InstrumentedProvider
- func (p *InstrumentedProvider) Chat(ctx context.Context, messages []llmtypes.Message, tools []shuttle.Tool) (*llmtypes.LLMResponse, error)
- func (p *InstrumentedProvider) ChatStream(ctx context.Context, messages []llmtypes.Message, tools []shuttle.Tool, ...) (*llmtypes.LLMResponse, error)
- func (p *InstrumentedProvider) Model() string
- func (p *InstrumentedProvider) Name() string
- type RateLimiter
- func (rl *RateLimiter) Close() error
- func (rl *RateLimiter) Do(ctx context.Context, call func(context.Context) (interface{}, error)) (interface{}, error)
- func (rl *RateLimiter) GetMetrics() RateLimiterMetrics
- func (rl *RateLimiter) GetTokenUsageLastMinute() int64
- func (rl *RateLimiter) RecordTokenUsage(tokens int64)
- type RateLimiterConfig
- type RateLimiterMetrics
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type InstrumentedProvider ¶
type InstrumentedProvider struct {
// contains filtered or unexported fields
}
InstrumentedProvider wraps any LLMProvider with observability instrumentation. It captures detailed traces and metrics for every LLM call, including: - Request/response details (messages, tool calls) - Token usage and cost tracking - Latency measurements - Error tracking
This wrapper is transparent and can wrap any LLMProvider implementation.
func NewInstrumentedProvider ¶
func NewInstrumentedProvider(provider llmtypes.LLMProvider, tracer observability.Tracer) *InstrumentedProvider
NewInstrumentedProvider creates a new instrumented LLM provider.
func (*InstrumentedProvider) Chat ¶
func (p *InstrumentedProvider) Chat(ctx context.Context, messages []llmtypes.Message, tools []shuttle.Tool) (*llmtypes.LLMResponse, error)
Chat sends a conversation to the LLM and captures detailed observability data.
func (*InstrumentedProvider) ChatStream ¶
func (p *InstrumentedProvider) ChatStream(ctx context.Context, messages []llmtypes.Message, tools []shuttle.Tool, tokenCallback llmtypes.TokenCallback) (*llmtypes.LLMResponse, error)
ChatStream streams tokens as they're generated from the LLM with full observability. Returns error if the underlying provider doesn't support streaming.
func (*InstrumentedProvider) Model ¶
func (p *InstrumentedProvider) Model() string
Model returns the underlying model identifier.
func (*InstrumentedProvider) Name ¶
func (p *InstrumentedProvider) Name() string
Name returns the underlying provider name.
type RateLimiter ¶
type RateLimiter struct {
// contains filtered or unexported fields
}
RateLimiter implements token bucket rate limiting for LLM requests.
func NewRateLimiter ¶
func NewRateLimiter(config RateLimiterConfig) *RateLimiter
NewRateLimiter creates a new rate limiter.
func (*RateLimiter) Close ¶
func (rl *RateLimiter) Close() error
Close stops the rate limiter and waits for pending requests.
func (*RateLimiter) Do ¶
func (rl *RateLimiter) Do(ctx context.Context, call func(context.Context) (interface{}, error)) (interface{}, error)
Do executes a function call with rate limiting and automatic retry on throttling.
func (*RateLimiter) GetMetrics ¶
func (rl *RateLimiter) GetMetrics() RateLimiterMetrics
GetMetrics returns current rate limiter metrics.
func (*RateLimiter) GetTokenUsageLastMinute ¶
func (rl *RateLimiter) GetTokenUsageLastMinute() int64
GetTokenUsageLastMinute returns token consumption in the last minute.
func (*RateLimiter) RecordTokenUsage ¶
func (rl *RateLimiter) RecordTokenUsage(tokens int64)
RecordTokenUsage records token consumption for rate limiting.
type RateLimiterConfig ¶
type RateLimiterConfig struct {
// Enabled enables rate limiting (default: true for production)
Enabled bool
// RequestsPerSecond is the maximum requests allowed per second across all agents.
// Default: 5 (conservative for AWS Bedrock)
RequestsPerSecond float64
// TokensPerMinute is the maximum tokens allowed per minute (for token-based rate limiting).
// Default: 100000 (AWS Bedrock typical limit)
TokensPerMinute int64
// BurstCapacity is the maximum burst of requests allowed.
// Default: 10 (allows brief bursts)
BurstCapacity int
// MinDelay is the minimum delay between requests (overrides RequestsPerSecond if larger).
// Default: 200ms
MinDelay time.Duration
// MaxRetries is the maximum number of retries for 429 throttling errors.
// Default: 5
MaxRetries int
// RetryBackoff is the initial backoff duration for retries (doubles each retry).
// Default: 1s
RetryBackoff time.Duration
// QueueTimeout is the maximum time a request can wait in the queue.
// Default: 5 minutes
QueueTimeout time.Duration
// Logger for rate limiter events
Logger *zap.Logger
}
RateLimiterConfig configures the LLM rate limiter.
func DefaultRateLimiterConfig ¶
func DefaultRateLimiterConfig() RateLimiterConfig
DefaultRateLimiterConfig returns conservative defaults for AWS Bedrock.