Documentation
¶
Overview ¶
Package models provides a comprehensive catalog of LLM model identifiers and documentation references for all supported providers.
This package organizes model constants by provider and includes reference URLs to official documentation pages for staying up-to-date with new models and changes.
Usage:
import "github.com/plexusone/omnillm/models" // Use model constants model := models.ClaudeOpus4 model := models.GPT4o model := models.Grok4_1FastReasoning // Reference documentation URLs for updates fmt.Println(models.AnthropicModelsURL) fmt.Println(models.OpenAIModelsURL)
Index ¶
Constants ¶
const ( // AnthropicModelsURL is the official Anthropic models documentation page. // Use this to check for new models, deprecations, and model updates. AnthropicModelsURL = "https://platform.claude.com/docs/en/about-claude/models/overview" // AnthropicAPIURL is the Anthropic API reference page. AnthropicAPIURL = "https://docs.anthropic.com/en/api" )
Anthropic Claude Model Documentation
const ( ClaudeOpus4_5 = "claude-opus-4-5-20251101" // Claude Opus 4.5 (November 2025) ClaudeSonnet4_5 = "claude-sonnet-4-5-20250929" // Claude Sonnet 4.5 (September 2025) ClaudeHaiku4_5 = "claude-haiku-4-5-20251001" // Claude Haiku 4.5 (October 2025) )
Claude 4.5 Family (Latest)
const ( ClaudeOpus4_1 = "claude-opus-4-1-20250805" // Claude Opus 4.1 (August 2025) ClaudeOpus4 = "claude-opus-4-20250514" // Claude Opus 4 (May 2025) )
Claude Opus 4 Family
const ( Claude3Opus = "claude-3-opus-20240229" // Claude 3 Opus (February 2024) Claude3Sonnet = "claude-3-sonnet-20240229" // Claude 3 Sonnet (February 2024) Claude3Haiku = "claude-3-haiku-20240307" // Claude 3 Haiku (March 2024) )
Claude 3 Family
const ( // BedrockModelsURL is the official AWS Bedrock models documentation page. // Use this to check for new models, deprecations, and model updates. BedrockModelsURL = "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" // BedrockAPIURL is the AWS Bedrock API reference page. BedrockAPIURL = "https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html" )
AWS Bedrock Model Documentation
const ( // BedrockClaudeOpus4 is Claude Opus 4 on AWS Bedrock. BedrockClaudeOpus4 = "anthropic.claude-opus-4-20250514-v1:0" // BedrockClaude3Opus is Claude 3 Opus on AWS Bedrock. BedrockClaude3Opus = "anthropic.claude-3-opus-20240229-v1:0" // BedrockClaude3Sonnet is Claude 3 Sonnet on AWS Bedrock. BedrockClaude3Sonnet = "anthropic.claude-3-sonnet-20240229-v1:0" )
Bedrock Claude Models
const ( // GeminiModelsURL is the official Google Gemini models documentation page. // Use this to check for new models, deprecations, and model updates. GeminiModelsURL = "https://ai.google.dev/gemini-api/docs/models/gemini" // GeminiAPIURL is the Google Gemini API reference page. GeminiAPIURL = "https://ai.google.dev/gemini-api/docs" )
Google Gemini Model Documentation
const ( // Gemini2_5Pro is stable with advanced reasoning capabilities. Gemini2_5Pro = "gemini-2.5-pro" // Gemini2_5Flash is stable with balanced performance. Gemini2_5Flash = "gemini-2.5-flash" // GeminiLive2_5Flash is the stable Live API model (private GA). GeminiLive2_5Flash = "gemini-live-2.5-flash" )
Gemini 2.5 Family (Latest)
const ( Gemini1_5Pro = "gemini-1.5-pro" // Gemini 1.5 Pro Gemini1_5Flash = "gemini-1.5-flash" // Gemini 1.5 Flash )
Gemini 1.5 Family
const ( // GLMModelsURL is the official Zhipu AI models documentation page. // Use this to check for new models, deprecations, and model updates. GLMModelsURL = "https://bigmodel.cn/dev/api/normal-model/glm-4" // GLMAPIURL is the GLM API reference page. GLMAPIURL = "https://open.bigmodel.cn/dev/howuse/introduction" )
GLM (Zhipu AI BigModel) Model Documentation
const ( // GLM5 is the flagship model with MoE architecture (744B/40B active). // Agentic engineering, 200K context, forced thinking mode. GLM5 = "glm-5" // GLM5Code is the code-specialized variant of GLM-5. // Optimized for programming, 200K context, forced thinking. GLM5Code = "glm-5-code" )
GLM-5 Series - Flagship models with MoE architecture
const ( // GLM4_7 is the premium model with Interleaved Thinking, 200K context. GLM4_7 = "glm-4.7" // GLM4_7FlashX is the high-speed paid version with priority GPU access. // 200K context, hybrid thinking, best price/performance for batch tasks. GLM4_7FlashX = "glm-4.7-flashx" // GLM4_7Flash is a free SOTA model with 200K context and hybrid thinking. // 1 concurrent request limit, ideal for prototyping. GLM4_7Flash = "glm-4.7-flash" )
GLM-4.7 Series - Premium with Interleaved Thinking
const ( // GLM4_5 is the first unified model with reasoning/coding/agent capabilities. // MoE 355B/32B active, 128K context, auto-thinking. GLM4_5 = "glm-4.5" // GLM4_5X is the ultra-fast premium version with lowest latency. // 128K context, auto-thinking. GLM4_5X = "glm-4.5-x" // GLM4_5Air is the cost-effective lightweight model. // MoE 106B/12B active, 128K context, auto-thinking. GLM4_5Air = "glm-4.5-air" // GLM4_5AirX is the accelerated Air version with priority GPU access. GLM4_5AirX = "glm-4.5-airx" // GLM4_5Flash is a free model with reasoning/coding/agents support. // 128K context, auto-thinking, function calling enabled. GLM4_5Flash = "glm-4.5-flash" )
GLM-4.5 Series - Unified Reasoning, Coding, and Agents
const ( // KimiModelsURL is the official Moonshot AI models documentation page. // Use this to check for new models, deprecations, and model updates. KimiModelsURL = "https://platform.moonshot.cn/docs/api/chat" // KimiAPIURL is the Kimi API reference page. KimiAPIURL = "https://platform.moonshot.cn/docs" )
Kimi (Moonshot AI) Model Documentation
const ( // KimiK2_0905 is an enhanced agentic coding model with improved frontend code // quality and context understanding, 256k context window. KimiK2_0905 = "kimi-k2-0905-preview" // KimiK2_0711 is the MoE base model with powerful code and agent capabilities, // 128k context window. KimiK2_0711 = "kimi-k2-0711-preview" // KimiK2Turbo is the high-speed version of K2-0905, 60-100 tokens/sec output speed, // 256k context window. KimiK2Turbo = "kimi-k2-turbo-preview" // KimiK2Thinking is a long-term thinking model with multi-step tool usage // and deep reasoning, 256k context window. KimiK2Thinking = "kimi-k2-thinking" // KimiK2ThinkingTurbo is the high-speed thinking model, 60-100 tokens/sec, // excels at deep reasoning, 256k context window. KimiK2ThinkingTurbo = "kimi-k2-thinking-turbo" )
Kimi K2 series - MoE foundation models with 1T total params, 32B activated
const ( // MoonshotV1_8K is suitable for generating short texts with 8k context window. MoonshotV1_8K = "moonshot-v1-8k" // MoonshotV1_32K is suitable for generating long texts with 32k context window. MoonshotV1_32K = "moonshot-v1-32k" // MoonshotV1_128K is suitable for generating very long texts with 128k context window. MoonshotV1_128K = "moonshot-v1-128k" )
Moonshot V1 series - General text generation models
const ( // MoonshotV1_8KVision understands image content and outputs text, 8k context. MoonshotV1_8KVision = "moonshot-v1-8k-vision-preview" // MoonshotV1_32KVision understands image content and outputs text, 32k context. MoonshotV1_32KVision = "moonshot-v1-32k-vision-preview" // MoonshotV1_128KVision understands image content and outputs text, 128k context. MoonshotV1_128KVision = "moonshot-v1-128k-vision-preview" )
Moonshot V1 Vision series - Multimodal models with image understanding
const ( // OllamaModelsURL is the official Ollama models library page. // Use this to check for new models and model updates. OllamaModelsURL = "https://ollama.com/library" // OllamaAPIURL is the Ollama API reference page. OllamaAPIURL = "https://github.com/ollama/ollama/blob/main/docs/api.md" )
Ollama Model Documentation
const ( OllamaLlama3_8B = "llama3:8b" // Llama 3 8B OllamaLlama3_70B = "llama3:70b" // Llama 3 70B )
Ollama Llama Models
const ( OllamaMistral7B = "mistral:7b" // Mistral 7B OllamaMixtral8x7B = "mixtral:8x7b" // Mixtral 8x7B )
Ollama Mistral Models
const ( OllamaCodeLlama = "codellama:13b" // CodeLlama 13B OllamaDeepSeek = "deepseek-coder:6.7b" // DeepSeek Coder 6.7B )
Ollama Code Models
const ( OllamaGemma2B = "gemma:2b" // Gemma 2B OllamaGemma7B = "gemma:7b" // Gemma 7B )
Ollama Gemma Models
const ( // OpenAIModelsURL is the official OpenAI models documentation page. // Use this to check for new models, deprecations, and model updates. OpenAIModelsURL = "https://platform.openai.com/docs/models" // OpenAIAPIURL is the OpenAI API reference page. OpenAIAPIURL = "https://platform.openai.com/docs/api-reference" )
OpenAI Model Documentation
const ( GPT5 = "gpt-5" // GPT-5 latest GPT5Mini = "gpt-5-mini" // GPT-5 Mini GPT5Nano = "gpt-5-nano" // GPT-5 Nano GPT5ChatLatest = "gpt-5-chat-latest" // GPT-5 Chat Latest )
GPT-5 Family (Latest)
const ( GPT4_1 = "gpt-4.1" // GPT-4.1 GPT4_1Mini = "gpt-4.1-mini" // GPT-4.1 Mini GPT4_1Nano = "gpt-4.1-nano" // GPT-4.1 Nano )
GPT-4.1 Family
const ( GPT4o = "gpt-4o" // GPT-4o GPT4oMini = "gpt-4o-mini" // GPT-4o Mini )
GPT-4o Family
const ( // QwenModelsURL is the official Qwen model documentation page. // Use this to check for new models, deprecations, and model updates. QwenModelsURL = "https://help.aliyun.com/zh/model-studio/getting-started/models" // QwenAPIURL is the Qwen DashScope API reference page. QwenAPIURL = "https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api" )
Qwen (Alibaba Cloud Tongyi Qianwen) Model Documentation
const ( // Qwen3Max is the latest flagship reasoning model with thinking capability. // Strong instruction following and complex task handling. Qwen3Max = "qwen3-max" // Qwen3MaxPreview is the preview version with extended thinking capabilities. Qwen3MaxPreview = "qwen3-max-preview" // QwenMax is the flagship reasoning model (International/China regions only). QwenMax = "qwen-max" )
Qwen3 Flagship series - Wide availability (International/Global/China)
const ( // Qwen3_5Plus is an advanced balanced model with thinking mode support. Qwen3_5Plus = "qwen3.5-plus" // QwenPlus is the balanced performance model with thinking mode. // Wide availability including US region. QwenPlus = "qwen-plus" // Qwen3_5Flash is an ultra-fast lightweight model for high-throughput. Qwen3_5Flash = "qwen3.5-flash" // QwenFlash is the fast lightweight model with context caching. // Wide availability including US region. QwenFlash = "qwen-flash" // QwenTurbo is the fastest lightweight model (deprecated, use QwenFlash). // Deprecated: use QwenFlash instead. QwenTurbo = "qwen-turbo" )
Qwen3.5 series - Balanced models
const ( // QwQPlus is the deep reasoning model with extended chain-of-thought. QwQPlus = "qwq-plus" // QwQ32B is the open-source 32B reasoning model with powerful logic capabilities. QwQ32B = "qwq-32b" )
QwQ series - Deep reasoning models
const ( // Qwen3_5_397B is the largest open-source model with 397B parameters. Qwen3_5_397B = "qwen3.5-397b-a17b" // Qwen3_5_122B is the large open-source model with 122B parameters. Qwen3_5_122B = "qwen3.5-122b-a10b" // Qwen3_5_27B is the medium open-source model with 27B parameters. Qwen3_5_27B = "qwen3.5-27b" // Qwen3_5_35B is the efficient open-source model with 35B parameters. Qwen3_5_35B = "qwen3.5-35b-a3b" )
Open source Qwen3.5 series
const ( // Qwen3_235B is the dual-mode 235B model supporting thinking and non-thinking. Qwen3_235B = "qwen3-235b-a22b" // Qwen3_32B is the versatile 32B model with dual-mode capabilities. Qwen3_32B = "qwen3-32b" // Qwen3_30B is the efficient 30B model with MoE architecture. Qwen3_30B = "qwen3-30b-a3b" // Qwen3_14B is the medium-sized 14B model with good performance-cost balance. Qwen3_14B = "qwen3-14b" // Qwen3_8B is the compact 8B model optimized for efficiency. Qwen3_8B = "qwen3-8b" )
Open source Qwen3 series
const ( // Qwen2_5_72B is the large 72B instruction-following model. Qwen2_5_72B = "qwen2.5-72b-instruct" // Qwen2_5_32B is the medium 32B instruction-following model. Qwen2_5_32B = "qwen2.5-32b-instruct" // Qwen2_5_14B is the compact 14B instruction-following model. Qwen2_5_14B = "qwen2.5-14b-instruct" // Qwen2_5_7B is the small 7B instruction-following model. Qwen2_5_7B = "qwen2.5-7b-instruct" // Qwen2_5_14B_1M is the extended context 14B model supporting up to 1M tokens. Qwen2_5_14B_1M = "qwen2.5-14b-instruct-1m" // Qwen2_5_7B_1M is the extended context 7B model supporting up to 1M tokens. Qwen2_5_7B_1M = "qwen2.5-7b-instruct-1m" )
Open source Qwen2.5 series
const ( // VertexModelsURL is the official Google Vertex AI models documentation page. // Use this to check for new models, deprecations, and model updates. VertexModelsURL = "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" // VertexAPIURL is the Google Vertex AI API reference page. VertexAPIURL = "https://cloud.google.com/vertex-ai/generative-ai/docs/reference/rest" )
Google Vertex AI Model Documentation
const ( // XAIModelsURL is the official X.AI models documentation page. // Use this to check for new models, deprecations, and model updates. XAIModelsURL = "https://docs.x.ai/docs/models" // XAIapiurl is the X.AI API reference page. XAIAPIURL = "https://docs.x.ai/docs" )
X.AI Grok Model Documentation
const ( // Grok4_1FastReasoning is the best tool-calling model with 2M context window. // Optimized for maximum intelligence and agentic tool calling. Grok4_1FastReasoning = "grok-4-1-fast-reasoning" // Grok4_1FastNonReasoning provides instant responses with 2M context window. // Optimized for speed without reasoning overhead. Grok4_1FastNonReasoning = "grok-4-1-fast-non-reasoning" )
Grok 4.1 Family (Latest - November 2025)
const ( // Grok4_0709 is the flagship Grok 4 model with 256K context window. // Released July 9, 2025. Provides high-quality reasoning. Grok4_0709 = "grok-4-0709" // Grok4FastReasoning provides fast reasoning with 2M context window. // Performance on par with grok-4-0709 with larger context. Grok4FastReasoning = "grok-4-fast-reasoning" // Grok4FastNonReasoning provides fast non-reasoning with 2M context window. // Optimized for speed without reasoning overhead. Grok4FastNonReasoning = "grok-4-fast-non-reasoning" // GrokCodeFast1 is optimized for agentic coding with 256K context window. // Speedy and economical reasoning model for coding tasks. GrokCodeFast1 = "grok-code-fast-1" )
Grok 4 Family (July 2025)
const ( Grok3 = "grok-3" // Grok 3 Grok3Mini = "grok-3-mini" // Grok 3 Mini (smaller, faster) )
Grok 3 Family
const ( Grok2_1212 = "grok-2-1212" // Grok 2 (December 2024) Grok2_Vision = "grok-2-vision-1212" // Grok 2 with vision capabilities )
Grok 2 Family
const ( // GrokBeta is deprecated. Use Grok3 or Grok4_1FastReasoning instead. GrokBeta = "grok-beta" // Deprecated: use grok-3 or grok-4 // GrokVision is deprecated. Use Grok2_Vision instead. GrokVision = "grok-vision-beta" // Deprecated )
Deprecated Grok Models
const (
// BedrockTitan is Amazon Titan Text Express on AWS Bedrock.
BedrockTitan = "amazon.titan-text-express-v1"
)
Bedrock Amazon Titan Models
const (
Claude3_5Haiku = "claude-3-5-haiku-20241022" // Claude 3.5 Haiku (October 2024)
)
Claude 3.5 Family
const (
Claude3_7Sonnet = "claude-3-7-sonnet-20250219" // Claude 3.7 Sonnet (February 2025)
)
Claude 3.7 Sonnet
const (
ClaudeSonnet4 = "claude-sonnet-4-20250514" // Claude Sonnet 4 (May 2025)
)
Claude Sonnet 4 Family
const (
// GLM4_32B is an ultra-budget dense 32B model, 128K context, no thinking mode.
GLM4_32B = "glm-4-32b-0414-128k"
)
GLM-4 Legacy - Dense Architecture
const (
// GLM4_6 is a balanced model with 200K context and auto-thinking.
GLM4_6 = "glm-4.6"
)
GLM-4.6 Series - Balanced with Auto Thinking
const (
GPT35Turbo = "gpt-3.5-turbo" // GPT-3.5 Turbo
)
GPT-3.5 Family
const (
GPT4Turbo = "gpt-4-turbo" // GPT-4 Turbo
)
GPT-4 Family
const (
GeminiPro = "gemini-pro" // Legacy Gemini Pro (use Gemini 2.5 instead)
)
Legacy Gemini Models
const ( // KimiK2_5 is the most intelligent model with native multimodal architecture. // Supports visual/text input, thinking/non-thinking modes, 256k context. KimiK2_5 = "kimi-k2.5" )
Kimi K2.5 series - Most intelligent and versatile multimodal model
const (
OllamaQwen2_5 = "qwen2.5:7b" // Qwen 2.5 7B
)
Ollama Other Models
const (
// VertexClaudeOpus4 is Claude Opus 4 on Google Vertex AI.
VertexClaudeOpus4 = "claude-opus-4@20250514"
)
Vertex AI Claude Models
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
This section is empty.