Documentation
¶
Index ¶
- Constants
- func GetEncodingForModel(model string) string
- func IsClaudeModel(model string) bool
- func SafeGetDefaultEncoding(t Tokenizer) string
- func SupportedEncodings() []string
- func SupportedModels() []string
- type DefaultTokenizer
- func (t *DefaultTokenizer) CountTokens(text string) (int, error)
- func (t *DefaultTokenizer) CountTokensForEncoding(text string, encoding string) (int, error)
- func (t *DefaultTokenizer) CountTokensForModel(text string, model string) (int, error)
- func (t *DefaultTokenizer) CountTokensInJSON(data interface{}) (int, error)
- func (t *DefaultTokenizer) CountTokensInJSONForEncoding(data interface{}, encoding string) (int, error)
- func (t *DefaultTokenizer) CountTokensInJSONForModel(data interface{}, model string) (int, error)
- func (t *DefaultTokenizer) GetDefaultEncoding() string
- func (t *DefaultTokenizer) IsEnabled() bool
- func (t *DefaultTokenizer) SetDefaultEncoding(encoding string) error
- func (t *DefaultTokenizer) SetEnabled(enabled bool)
- type ModelEncoding
- type SavingsCalculator
- type ServerToolInfo
- type TokenSavingsMetrics
- type Tokenizer
- type ToolInfo
Constants ¶
const DefaultEncoding = "cl100k_base"
DefaultEncoding is the fallback encoding when model is not recognized
Variables ¶
This section is empty.
Functions ¶
func GetEncodingForModel ¶
GetEncodingForModel returns the appropriate encoding for a given model
func IsClaudeModel ¶
IsClaudeModel checks if a model is a Claude/Anthropic model
func SafeGetDefaultEncoding ¶ added in v0.20.1
SafeGetDefaultEncoding extracts the default encoding from a Tokenizer interface without risking a panic. Handles nil interface, nil underlying *DefaultTokenizer, and non-DefaultTokenizer implementations. Returns DefaultEncoding as fallback. See issue #318: a nil *DefaultTokenizer assigned to a Tokenizer interface creates a non-nil interface with nil concrete value, causing panics on type assertion.
func SupportedEncodings ¶
func SupportedEncodings() []string
SupportedEncodings returns a list of all supported encodings
func SupportedModels ¶
func SupportedModels() []string
SupportedModels returns a list of all supported model names
Types ¶
type DefaultTokenizer ¶
type DefaultTokenizer struct {
// contains filtered or unexported fields
}
DefaultTokenizer implements the Tokenizer interface using tiktoken-go
func NewTokenizer ¶
func NewTokenizer(defaultEncoding string, logger *zap.SugaredLogger, enabled bool) (*DefaultTokenizer, error)
NewTokenizer creates a new tokenizer instance. When enabled=false, encoding validation is skipped so that a disabled tokenizer can always be created (even with a corrupted tiktoken cache). See issue #318.
func (*DefaultTokenizer) CountTokens ¶
func (t *DefaultTokenizer) CountTokens(text string) (int, error)
CountTokens counts tokens using the default encoding
func (*DefaultTokenizer) CountTokensForEncoding ¶
func (t *DefaultTokenizer) CountTokensForEncoding(text string, encoding string) (int, error)
CountTokensForEncoding counts tokens using a specific encoding
func (*DefaultTokenizer) CountTokensForModel ¶
func (t *DefaultTokenizer) CountTokensForModel(text string, model string) (int, error)
CountTokensForModel counts tokens for a specific model
func (*DefaultTokenizer) CountTokensInJSON ¶
func (t *DefaultTokenizer) CountTokensInJSON(data interface{}) (int, error)
CountTokensInJSON serializes data to JSON and counts tokens
func (*DefaultTokenizer) CountTokensInJSONForEncoding ¶
func (t *DefaultTokenizer) CountTokensInJSONForEncoding(data interface{}, encoding string) (int, error)
CountTokensInJSONForEncoding serializes data to JSON and counts tokens
func (*DefaultTokenizer) CountTokensInJSONForModel ¶
func (t *DefaultTokenizer) CountTokensInJSONForModel(data interface{}, model string) (int, error)
CountTokensInJSONForModel serializes data to JSON and counts tokens for a model
func (*DefaultTokenizer) GetDefaultEncoding ¶
func (t *DefaultTokenizer) GetDefaultEncoding() string
GetDefaultEncoding returns the current default encoding
func (*DefaultTokenizer) IsEnabled ¶
func (t *DefaultTokenizer) IsEnabled() bool
IsEnabled returns whether token counting is enabled
func (*DefaultTokenizer) SetDefaultEncoding ¶
func (t *DefaultTokenizer) SetDefaultEncoding(encoding string) error
SetDefaultEncoding changes the default encoding
func (*DefaultTokenizer) SetEnabled ¶
func (t *DefaultTokenizer) SetEnabled(enabled bool)
SetEnabled enables or disables token counting
type ModelEncoding ¶
ModelEncoding represents the mapping between model names and their tiktoken encodings
type SavingsCalculator ¶
type SavingsCalculator struct {
// contains filtered or unexported fields
}
SavingsCalculator calculates token savings from using MCPProxy
func NewSavingsCalculator ¶
func NewSavingsCalculator(tokenizer Tokenizer, logger *zap.SugaredLogger, model string) *SavingsCalculator
NewSavingsCalculator creates a new token savings calculator
func (*SavingsCalculator) CalculateProxySavings ¶
func (sc *SavingsCalculator) CalculateProxySavings( servers []ServerToolInfo, topK int, ) (*TokenSavingsMetrics, error)
CalculateProxySavings calculates token savings from using MCPProxy vs listing all tools
func (*SavingsCalculator) CalculateToolListTokens ¶
func (sc *SavingsCalculator) CalculateToolListTokens(tools []ToolInfo) (int, error)
CalculateToolListTokens calculates tokens for a single server's tool list
type ServerToolInfo ¶
ServerToolInfo represents tool information for a single server
type TokenSavingsMetrics ¶
type TokenSavingsMetrics struct {
TotalServerToolListSize int `json:"total_server_tool_list_size"` // All upstream tools combined (tokens)
AverageQueryResultSize int `json:"average_query_result_size"` // Typical retrieve_tools output (tokens)
SavedTokens int `json:"saved_tokens"` // Difference
SavedTokensPercentage float64 `json:"saved_tokens_percentage"` // Percentage saved
PerServerToolListSizes map[string]int `json:"per_server_tool_list_sizes"` // Token size per server
}
TokenSavingsMetrics represents token savings data
type Tokenizer ¶
type Tokenizer interface {
// CountTokens counts tokens in text using the default encoding
CountTokens(text string) (int, error)
// CountTokensForModel counts tokens for a specific model
CountTokensForModel(text string, model string) (int, error)
// CountTokensForEncoding counts tokens using a specific encoding
CountTokensForEncoding(text string, encoding string) (int, error)
// CountTokensInJSON counts tokens in a JSON object (serialized first)
CountTokensInJSON(data interface{}) (int, error)
// CountTokensInJSONForModel counts tokens in JSON for a specific model
CountTokensInJSONForModel(data interface{}, model string) (int, error)
}
Tokenizer provides token counting functionality for various LLM models