llmtests

package
v1.5.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 28, 2026 License: Apache-2.0 Imports: 35 Imported by: 0

Documentation

Overview

Package llmtests provides comprehensive test account and configuration management for the Bifrost system. It implements account functionality for testing purposes, supporting multiple AI providers and comprehensive test scenarios.

Package llmtests provides batch API test utilities for the Bifrost system.

Package llmtests provides container API test utilities for the Bifrost system.

Package llmtests provides comprehensive test utilities and configurations for the Bifrost system. It includes comprehensive test implementations covering all major AI provider scenarios, including text completion, chat, tool calling, image processing, and end-to-end workflows.

Index

Constants

View Source
const (
	// Basic test text for simple round-trip validation
	TTSTestTextBasic = "" /* 342-byte string literal not displayed */

	// Medium length text with punctuation for comprehensive testing
	TTSTestTextMedium = "" /* 616-byte string literal not displayed */

	// Technical text for comprehensive format testing
	TTSTestTextTechnical = "" /* 799-byte string literal not displayed */
)

Shared test texts for TTS->SST round-trip validation

View Source
const Concurrency = 4
View Source
const HelloWorldPDFBase64 = "data:application/pdf;base64,JVBERi0xLjcKCjEgMCBvYmogICUgZW50cnkgcG9pbnQKPDwKICAvVHlwZSAvQ2F0YWxvZwogIC" +
	"9QYWdlcyAyIDAgUgo+PgplbmRvYmoKCjIgMCBvYmoKPDwKICAvVHlwZSAvUGFnZXwKICAvTWV" +
	"kaWFCb3ggWyAwIDAgMjAwIDIwMCBdCiAgL0NvdW50IDEKICAvS2lkcyBbIDMgMCBSIF0KPj4K" +
	"ZW5kb2JqCgozIDAgb2JqCjw8CiAgL1R5cGUgL1BhZ2UKICAvUGFyZW50IDIgMCBSCiAgL1Jlc" +
	"291cmNlcyA8PAogICAgL0ZvbnQgPDwKICAgICAgL0YxIDQgMCBSCj4+CiAgPj4KICAvQ29udG" +
	"VudHMgNSAwIFIKPj4KZW5kb2JqCgo0IDAgb2JqCjw8CiAgL1R5cGUgL0ZvbnQKICAvU3VidHl" +
	"wZSAvVHlwZTEKICAvQmFzZUZvbnQgL1RpbWVzLVJvbWFuCj4+CmVuZG9iagoKNSAwIG9iago8" +
	"PAogIC9MZW5ndGggNDQKPj4Kc3RyZWFtCkJUCjcwIDUwIFRECi9GMSAxMiBUZgooSGVsbG8gV" +
	"29ybGQhKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCgp4cmVmCjAgNgowMDAwMDAwMDAwIDY1NT" +
	"M1IGYgCjAwMDAwMDAwMTAgMDAwMDAgbiAKMDAwMDAwMDA2MCAwMDAwMCBuIAowMDAwMDAwMTU" +
	"3IDAwMDAwIG4gCjAwMDAwMDAyNTUgMDAwMDAgbiAKMDAwMDAwMDM1MyAwMDAwMCBuIAp0cmFp" +
	"bGVyCjw8CiAgL1NpemUgNgogIC9Sb290IDEgMCBSCj4+CnN0YXJ0eHJlZgo0NDkKJSVFT0YK"

HelloWorldPDFBase64 is a base64 encoded PDF file containing "Hello World!" text. This is a minimal valid PDF for testing document input functionality.

View Source
const ProviderOpenAICustom = schemas.ModelProvider("openai-custom")

ProviderOpenAICustom represents the custom OpenAI provider for testing

View Source
const TestFileURL = "https://www.berkshirehathaway.com/letters/2024ltr.pdf"

Test file URL

View Source
const TestImageBase64 = "" /* 407-byte string literal not displayed */

Test image base64 of a grey solid

View Source
const TestImageURL = "https://pestworldcdn-dcf2a8gbggazaghf.z01.azurefd.net/media/561791/carpenter-ant4.jpg"

Test image of an ant

View Source
const TestImageURL2 = "" /* 130-byte string literal not displayed */

Test image of the Eiffel Tower

View Source
const (
	// TestTimeout defines the maximum duration for comprehensive tests
	// Set to 20 minutes to allow for complex multi-step operations
	TestTimeout = 20 * time.Minute
)

Constants for test configuration

Variables

View Source
var (
	// Common error categories
	CategoryHTTP       = ErrorCategory{"HTTP", "HTTP/Network Error", "🔴"}
	CategoryAuth       = ErrorCategory{"Authentication", "Authentication/Authorization Error", "🔐"}
	CategoryRateLimit  = ErrorCategory{"Rate Limit", "Rate Limiting Error", "⏱️"}
	CategoryProvider   = ErrorCategory{"Provider", "Provider-Specific Error", "⚠️"}
	CategoryValidation = ErrorCategory{"Validation", "Input Validation Error", "📋"}
	CategoryTimeout    = ErrorCategory{"Timeout", "Request Timeout Error", "⏰"}
	CategoryQuota      = ErrorCategory{"Quota", "Quota/Billing Error", "💳"}
	CategoryModel      = ErrorCategory{"Model", "Model-Related Error", "🤖"}
	CategoryBifrost    = ErrorCategory{"Bifrost", "Bifrost Internal Error", "🌉"}
	CategoryUnknown    = ErrorCategory{"Unknown", "Unknown Error", "❓"}
)
View Source
var AllProviderConfigs = []ComprehensiveTestConfig{
	{
		Provider:             schemas.OpenAI,
		ChatModel:            "gpt-4o-mini",
		TextModel:            "",
		ReasoningModel:       "o1-mini",
		PromptCachingModel:   "gpt-4.1",
		TranscriptionModel:   "whisper-1",
		SpeechSynthesisModel: "tts-1",
		ImageGenerationModel: "gpt-image-1",
		ImageEditModel:       "dall-e-2",
		ImageVariationModel:  "dall-e-2",
		ChatAudioModel:       "gpt-4o-mini-audio-preview",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			TextCompletionStream:       false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			SpeechSynthesis:            true,
			SpeechSynthesisStream:      true,
			Transcription:              true,
			TranscriptionStream:        true,
			ImageGeneration:            true,
			ImageGenerationStream:      true,
			ImageEdit:                  true,
			ImageEditStream:            true,
			ImageVariation:             true,
			ImageVariationStream:       false,
			Embedding:                  true,
			Reasoning:                  true,
			ListModels:                 true,
			BatchCreate:                true,
			BatchList:                  true,
			BatchRetrieve:              true,
			BatchCancel:                true,
			BatchResults:               true,
			FileUpload:                 true,
			FileList:                   true,
			FileRetrieve:               true,
			FileDelete:                 true,
			FileContent:                true,
			ChatAudio:                  true,
			ContainerCreate:            true,
			ContainerList:              true,
			ContainerRetrieve:          true,
			ContainerDelete:            true,
			ContainerFileCreate:        true,
			ContainerFileList:          true,
			ContainerFileRetrieve:      true,
			ContainerFileContent:       true,
			ContainerFileDelete:        true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.Anthropic, Model: "claude-3-7-sonnet-20250219"},
		},
	},
	{
		Provider:  schemas.Anthropic,
		ChatModel: "claude-3-7-sonnet-20250219",
		TextModel: "",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			PromptCaching:              true,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  false,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
			BatchCreate:                true,
			BatchList:                  true,
			BatchRetrieve:              true,
			BatchCancel:                true,
			BatchResults:               true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:            schemas.Bedrock,
		ChatModel:           "anthropic.claude-3-sonnet-20240229-v1:0",
		TextModel:           "",
		ImageEditModel:      "amazon.titan-image-generator-v1",
		ImageVariationModel: "amazon.titan-image-generator-v1",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			PromptCaching:              true,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  true,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  true,
			ImageEditStream:            false,
			ImageVariation:             true,
			ImageVariationStream:       false,
			ListModels:                 true,
			BatchCreate:                true,
			BatchList:                  true,
			BatchRetrieve:              true,
			BatchCancel:                true,
			BatchResults:               true,
			FileUpload:                 true,
			FileList:                   true,
			FileRetrieve:               true,
			FileDelete:                 true,
			FileContent:                true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:  schemas.Cohere,
		ChatModel: "command-a-03-2025",
		TextModel: "",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      false,
			ImageURL:                   false,
			ImageBase64:                false,
			MultipleImages:             false,
			CompleteEnd2End:            true,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  true,
			ListModels:                 true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:             schemas.Azure,
		ChatModel:            "gpt-4o",
		TextModel:            "",
		ChatAudioModel:       "gpt-4o-mini-audio-preview",
		TranscriptionModel:   "whisper-1",
		SpeechSynthesisModel: "gpt-4o-mini-tts",
		ImageGenerationModel: "gpt-image-1",
		ImageEditModel:       "dall-e-2",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			SpeechSynthesis:            true,
			SpeechSynthesisStream:      true,
			Transcription:              true,
			TranscriptionStream:        false,
			Embedding:                  true,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  true,
			ImageEditStream:            true,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
			BatchCreate:                true,
			BatchList:                  true,
			BatchRetrieve:              true,
			BatchCancel:                true,
			BatchResults:               true,
			FileUpload:                 true,
			FileList:                   true,
			FileRetrieve:               true,
			FileDelete:                 true,
			FileContent:                true,
			ChatAudio:                  true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
		DisableParallelFor: []string{"Transcription"},
	},
	{
		Provider:             schemas.Vertex,
		ChatModel:            "gemini-pro",
		TextModel:            "",
		ImageGenerationModel: "imagen-4.0-generate-001",
		ImageEditModel:       "imagen-4.0-generate-001",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			ImageGeneration:            true,
			ImageGenerationStream:      false,
			ImageEdit:                  true,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  true,
			ListModels:                 true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:           schemas.Mistral,
		ChatModel:          "mistral-large-2411",
		TextModel:          "",
		TranscriptionModel: "voxtral-mini-latest",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              true,
			TranscriptionStream:        true,
			Embedding:                  true,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:  schemas.Ollama,
		ChatModel: "llama3.2",
		TextModel: "",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  false,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:  schemas.Groq,
		ChatModel: "llama-3.3-70b-versatile",
		TextModel: "",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  false,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:       schemas.Fireworks,
		ChatModel:      "accounts/fireworks/models/deepseek-v3p2",
		TextModel:      "accounts/fireworks/models/deepseek-v3p2",
		EmbeddingModel: "nomic-ai/nomic-embed-text-v1.5",
		Scenarios: TestScenarios{
			TextCompletion:        true,
			TextCompletionStream:  true,
			SimpleChat:            true,
			CompletionStream:      true,
			MultiTurnConversation: true,
			ToolCalls:             true,
			ToolCallsStreaming:    true,
			MultipleToolCalls:     false,
			End2EndToolCalling:    false,
			AutomaticFunctionCall: false,
			ImageURL:              false,
			ImageBase64:           false,
			MultipleImages:        false,
			FileBase64:            false,
			FileURL:               false,
			CompleteEnd2End:       true,
			Embedding:             true,
			ListModels:            true,
			Reasoning:             false,
			Transcription:         false,
			SpeechSynthesis:       false,
			PromptCaching:         false,
		},
	},
	{
		Provider:  ProviderOpenAICustom,
		ChatModel: "llama-3.3-70b-versatile",
		TextModel: "",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   false,
			ImageBase64:                false,
			MultipleImages:             false,
			CompleteEnd2End:            true,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  false,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:             schemas.Gemini,
		ChatModel:            "gemini-2.0-flash",
		TextModel:            "",
		TranscriptionModel:   "gemini-2.5-flash",
		SpeechSynthesisModel: "gemini-2.5-flash-preview-tts",
		EmbeddingModel:       "gemini-embedding-001",
		ImageGenerationModel: "imagen-4.0-generate-001",
		ImageEditModel:       "imagen-4.0-generate-001",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			SpeechSynthesis:            true,
			SpeechSynthesisStream:      true,
			Transcription:              true,
			TranscriptionStream:        true,
			Embedding:                  true,
			ImageGeneration:            true,
			ImageGenerationStream:      false,
			ImageEdit:                  true,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
			BatchCreate:                true,
			BatchList:                  true,
			BatchRetrieve:              true,
			BatchCancel:                true,
			BatchResults:               true,
			FileUpload:                 true,
			FileList:                   true,
			FileRetrieve:               true,
			FileDelete:                 true,
			FileContent:                false,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:  schemas.OpenRouter,
		ChatModel: "openai/gpt-4o",
		TextModel: "google/gemini-2.5-flash",
		Scenarios: TestScenarios{
			TextCompletion:             true,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  false,
			ListModels:                 true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:             schemas.HuggingFace,
		ChatModel:            "groq/openai/gpt-oss-120b",
		VisionModel:          "novita/zai-org/GLM-4.6V-Flash",
		EmbeddingModel:       "sambanova/intfloat/e5-mistral-7b-instruct",
		TranscriptionModel:   "fal-ai/openai/whisper-large-v3",
		SpeechSynthesisModel: "fal-ai/hexgrad/Kokoro-82M",
		ImageGenerationModel: "fal-ai/fal-ai/flux-2",
		ImageEditModel:       "fal-ai/fal-ai/flux-2",
		Scenarios: TestScenarios{
			TextCompletion:        false,
			TextCompletionStream:  false,
			SimpleChat:            true,
			CompletionStream:      true,
			MultiTurnConversation: true,
			ToolCalls:             true,
			ToolCallsStreaming:    true,
			MultipleToolCalls:     false,
			End2EndToolCalling:    true,
			AutomaticFunctionCall: true,
			ImageURL:              true,
			ImageBase64:           true,
			MultipleImages:        true,
			CompleteEnd2End:       true,
			Embedding:             true,
			ImageGeneration:       true,
			ImageGenerationStream: true,
			ImageEdit:             true,
			ImageEditStream:       true,
			ImageVariation:        false,
			ImageVariationStream:  false,
			Transcription:         true,
			TranscriptionStream:   false,
			SpeechSynthesis:       true,
			SpeechSynthesisStream: false,
			Reasoning:             false,
			ListModels:            true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
	{
		Provider:             schemas.XAI,
		ChatModel:            "grok-4-0709",
		TextModel:            "",
		ImageGenerationModel: "grok-2-image",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  false,
			ImageGeneration:            true,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
		},
	},
	{
		Provider:             schemas.Replicate,
		ChatModel:            "openai/gpt-4.1-mini",
		TextModel:            "openai/gpt-4.1-mini",
		ImageGenerationModel: "black-forest-labs/flux-dev",
		Scenarios: TestScenarios{
			TextCompletion:             false,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
			AutomaticFunctionCall:      true,
			ImageURL:                   true,
			ImageBase64:                true,
			MultipleImages:             true,
			CompleteEnd2End:            true,
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              false,
			TranscriptionStream:        false,
			Embedding:                  false,
			ListModels:                 true,
			ImageGeneration:            true,
			ImageGenerationStream:      false,
		},
	}, {
		Provider:           schemas.VLLM,
		ChatModel:          "Qwen/Qwen3-0.6B",
		TextModel:          "Qwen/Qwen3-0.6B",
		EmbeddingModel:     "Qwen/Qwen3-Embedding-0.6B",
		TranscriptionModel: "openai/whisper-small",
		Scenarios: TestScenarios{
			SpeechSynthesis:            false,
			SpeechSynthesisStream:      false,
			Transcription:              true,
			TranscriptionStream:        true,
			Embedding:                  true,
			ImageGeneration:            false,
			ImageGenerationStream:      false,
			ImageEdit:                  false,
			ImageEditStream:            false,
			ImageVariation:             false,
			ImageVariationStream:       false,
			ListModels:                 true,
			TextCompletion:             true,
			TextCompletionStream:       true,
			SimpleChat:                 true,
			CompletionStream:           true,
			MultiTurnConversation:      true,
			ToolCalls:                  true,
			MultipleToolCalls:          true,
			MultipleToolCallsStreaming: true,
			End2EndToolCalling:         true,
		},
		Fallbacks: []schemas.Fallback{
			{Provider: schemas.OpenAI, Model: "gpt-4o-mini"},
		},
	},
}

AllProviderConfigs contains test configurations for all providers

View Source
var AllowedAudioFormats = map[string]bool{
	"flac": true, "mp3": true, "mp4": true, "mpeg": true,
	"mpga": true, "m4a": true, "ogg": true, "wav": true, "webm": true,
}

AllowedAudioFormats defines the set of valid audio formats for speech synthesis

View Source
var CalculatorToolFunction = &schemas.ChatToolFunction{
	Parameters: &schemas.ToolFunctionParameters{
		Type: "object",
		Properties: schemas.NewOrderedMapFromPairs(
			schemas.KV("expression", map[string]interface{}{
				"type":        "string",
				"description": "The mathematical expression to evaluate, e.g. '2 + 3' or '10 * 5'",
			}),
		),
		Required: []string{"expression"},
	},
}
View Source
var PingToolFunctionWithEmpty = &schemas.ChatToolFunction{
	Parameters: &schemas.ToolFunctionParameters{
		Type:       "object",
		Properties: schemas.NewOrderedMap(),
	},
}

PingToolFunctionWithEmpty has an explicitly empty OrderedMap for properties

View Source
var PingToolFunctionWithNil = &schemas.ChatToolFunction{
	Parameters: &schemas.ToolFunctionParameters{
		Type:       "object",
		Properties: nil,
	},
}

PingToolFunctionWithNil has nil properties that get auto-initialized during marshalling

View Source
var TimeToolFunction = &schemas.ChatToolFunction{
	Parameters: &schemas.ToolFunctionParameters{
		Type: "object",
		Properties: schemas.NewOrderedMapFromPairs(
			schemas.KV("timezone", map[string]interface{}{
				"type":        "string",
				"description": "The timezone identifier, e.g. 'America/New_York' or 'UTC'",
			}),
		),
		Required: []string{"timezone"},
	},
}
View Source
var WeatherToolFunction = &schemas.ChatToolFunction{
	Parameters: &schemas.ToolFunctionParameters{
		Type: "object",
		Properties: schemas.NewOrderedMapFromPairs(
			schemas.KV("location", map[string]interface{}{
				"type":        "string",
				"description": "The city and state, e.g. San Francisco, CA",
			}),
			schemas.KV("unit", map[string]interface{}{
				"type": "string",
				"enum": []string{"celsius", "fahrenheit"},
			}),
		),
		Required: []string{"location"},
	},
}

Functions

func AssertNoError

func AssertNoError(t *testing.T, err *schemas.BifrostError, msgAndArgs ...interface{}) bool

AssertNoError is like assert.NoError but with better error formatting

func BasicRerankExpectations

func BasicRerankExpectations(t *testing.T, rerankResponse *schemas.BifrostRerankResponse, documents []schemas.RerankDocument)

BasicRerankExpectations validates common rerank invariants for provider tests.

func CreateAudioChatMessage

func CreateAudioChatMessage(text, audioData string, audioFormat string) schemas.ChatMessage

func CreateBasicChatMessage

func CreateBasicChatMessage(content string) schemas.ChatMessage

Helper functions for creating requests

func CreateBasicResponsesMessage

func CreateBasicResponsesMessage(content string) schemas.ResponsesMessage

func CreateDocumentChatMessage

func CreateDocumentChatMessage(text, documentBase64 string) schemas.ChatMessage

CreateDocumentChatMessage creates a ChatMessage with a PDF document in base64 format

func CreateDocumentResponsesMessage

func CreateDocumentResponsesMessage(text, documentBase64 string) schemas.ResponsesMessage

CreateDocumentResponsesMessage creates a ResponsesMessage with a PDF document in base64 format

func CreateFileURLChatMessage

func CreateFileURLChatMessage(text, fileURL string) schemas.ChatMessage

CreateFileURLChatMessage creates a ChatMessage with a file URL

func CreateFileURLResponsesMessage

func CreateFileURLResponsesMessage(text, fileURL string) schemas.ResponsesMessage

CreateFileURLResponsesMessage creates a ResponsesMessage with a file URL

func CreateImageChatMessage

func CreateImageChatMessage(text, imageURL string) schemas.ChatMessage

func CreateImageResponsesMessage

func CreateImageResponsesMessage(text, imageURL string) schemas.ResponsesMessage

func CreateSpeechRequest

func CreateSpeechRequest(text, voice, format string) *schemas.BifrostSpeechRequest

CreateSpeechRequest creates a basic speech input for testing

func CreateToolChatMessage

func CreateToolChatMessage(content string, toolCallID string) schemas.ChatMessage

func CreateToolResponsesMessage

func CreateToolResponsesMessage(content string, toolCallID string) schemas.ResponsesMessage

func CreateTranscriptionInput

func CreateTranscriptionInput(audioData []byte, language, responseFormat *string) *schemas.BifrostTranscriptionRequest

CreateTranscriptionInput creates a basic transcription input for testing

func DeepCopyBifrostStreamChunk

func DeepCopyBifrostStreamChunk(original *schemas.BifrostStreamChunk) *schemas.BifrostStreamChunk

DeepCopyBifrostStreamChunk creates a deep copy of a BifrostStreamChunk object to avoid pooling issues

func DetectAudioFormat

func DetectAudioFormat(data []byte) string

DetectAudioFormat detects the audio format from the buffer header bytes. Returns the detected format string (mp3, wav, flac, ogg, mp4, m4a, webm) or empty string if unknown.

func FormatError

func FormatError(parsed ParsedError) string

FormatError formats a ParsedError for display

func FormatErrorConcise

func FormatErrorConcise(parsed ParsedError) string

FormatErrorConcise formats a ParsedError in a concise format

func GenerateTTSAudioForTest

func GenerateTTSAudioForTest(ctx context.Context, t *testing.T, client *bifrost.Bifrost, provider schemas.ModelProvider, ttsModel string, text string, voiceType string, format string) ([]byte, string)

GenerateTTSAudioForTest generates real audio using TTS and writes a temp file. Returns audio bytes and temp filepath. Caller’s t will clean it up.

func GetChatContent

func GetChatContent(response *schemas.BifrostChatResponse) string

GetChatContent returns the string content from a BifrostChatResponse

func GetErrorMessage

func GetErrorMessage(err *schemas.BifrostError) string

func GetLionBase64Image

func GetLionBase64Image() (string, error)

GetLionBase64Image loads and returns the lion base64 image data from file

func GetPromptCachingTools

func GetPromptCachingTools() []schemas.ChatTool

GetPromptCachingTools returns 10 tools for testing prompt caching with tools

func GetProviderDefaultFormat

func GetProviderDefaultFormat(provider schemas.ModelProvider) string

func GetProviderVoice

func GetProviderVoice(provider schemas.ModelProvider, voiceType string) string

GetProviderVoice returns an appropriate voice for the given provider

func GetResponsesContent

func GetResponsesContent(response *schemas.BifrostResponsesResponse) string

GetResponsesContent returns the string content from a BifrostResponsesResponse

func GetResultContent

func GetResultContent(response *schemas.BifrostResponse) string

func GetRetryDelay

func GetRetryDelay(err *schemas.BifrostError, attempt int) int

GetRetryDelay suggests a retry delay based on the error type

func GetSampleAudioBase64

func GetSampleAudioBase64() (string, error)

GetSampleAudioBase64 loads and returns the sample audio file as base64 encoded string

func GetSampleChatTool

func GetSampleChatTool(toolName SampleToolType) *schemas.ChatTool

func GetSampleResponsesTool

func GetSampleResponsesTool(toolName SampleToolType) *schemas.ResponsesTool

func GetTextCompletionContent

func GetTextCompletionContent(response *schemas.BifrostTextCompletionResponse) string

GetTextCompletionContent returns the string content from a BifrostTextCompletionResponse

func IsRetryableError

func IsRetryableError(err *schemas.BifrostError) bool

IsRetryableError determines if an error should trigger a retry

func LogError

func LogError(t *testing.T, err *schemas.BifrostError, context string)

LogError logs a BifrostError in a readable format

func LogErrorConcise

func LogErrorConcise(t *testing.T, err *schemas.BifrostError, context string)

LogErrorConcise logs a BifrostError in a concise format

func ReplicateDirectKeyForListModels added in v1.5.5

func ReplicateDirectKeyForListModels() schemas.Key

func RequireNoError

func RequireNoError(t *testing.T, err *schemas.BifrostError, msgAndArgs ...interface{})

RequireNoError is like require.NoError but with better error formatting ALWAYS includes ❌ prefix in error messages for consistency

func RunAllComprehensiveTests

func RunAllComprehensiveTests(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunAllComprehensiveTests executes all comprehensive test scenarios for a given configuration

func RunAllOpusReasoningTests

func RunAllOpusReasoningTests(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig)

RunAllOpusReasoningTests runs Opus 4.5 and 4.6 reasoning tests for a given provider

func RunAutomaticFunctionCallingTest

func RunAutomaticFunctionCallingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunAutomaticFunctionCallingTest executes the automatic function calling test scenario using dual API testing framework

func RunBatchCancelTest

func RunBatchCancelTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunBatchCancelTest tests the batch cancel functionality

func RunBatchCreateTest

func RunBatchCreateTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunBatchCreateTest tests the batch create functionality

func RunBatchListTest

func RunBatchListTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunBatchListTest tests the batch list functionality

func RunBatchResultsTest

func RunBatchResultsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunBatchResultsTest tests the batch results functionality

func RunBatchRetrieveTest

func RunBatchRetrieveTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunBatchRetrieveTest tests the batch retrieve functionality

func RunBatchUnsupportedTest

func RunBatchUnsupportedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunBatchUnsupportedTest tests that unsupported providers return appropriate errors

func RunChatAudioStreamTest

func RunChatAudioStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunChatAudioStreamTest executes the chat audio streaming test scenario

func RunChatAudioTest

func RunChatAudioTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunChatAudioTest executes the chat audio test scenario

func RunChatCompletionReasoningTest

func RunChatCompletionReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunChatCompletionReasoningTest executes the reasoning test scenario to test thinking capabilities via Chat Completions API

func RunChatCompletionStreamTest

func RunChatCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunChatCompletionStreamTest executes the chat completion stream test scenario

func RunCompactionTest

func RunCompactionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunCompactionTest tests that context_management with compaction is correctly forwarded through Bifrost via the Responses API.

Because compaction requires a minimum trigger of 50,000 input tokens, this test does NOT trigger actual compaction. Instead it verifies:

  1. The context_management field survives the Bifrost request round-trip
  2. The compact-2026-01-12 beta header is properly sent
  3. The API accepts the request without error (non-streaming + streaming)

func RunCompleteEnd2EndTest

func RunCompleteEnd2EndTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunCompleteEnd2EndTest executes the complete end-to-end test scenario

func RunContainerCreateTest

func RunContainerCreateTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerCreateTest tests the container create functionality

func RunContainerDeleteTest

func RunContainerDeleteTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerDeleteTest tests the container delete functionality

func RunContainerFileContentTest

func RunContainerFileContentTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerFileContentTest tests the container file content functionality

func RunContainerFileCreateTest

func RunContainerFileCreateTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerFileCreateTest tests the container file create functionality

func RunContainerFileDeleteTest

func RunContainerFileDeleteTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerFileDeleteTest tests the container file delete functionality

func RunContainerFileListTest

func RunContainerFileListTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerFileListTest tests the container file list functionality

func RunContainerFileRetrieveTest

func RunContainerFileRetrieveTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerFileRetrieveTest tests the container file retrieve functionality

func RunContainerFileUnsupportedTest

func RunContainerFileUnsupportedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerFileUnsupportedTest tests that providers correctly return unsupported operation errors for container file operations

func RunContainerListTest

func RunContainerListTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerListTest tests the container list functionality

func RunContainerRetrieveTest

func RunContainerRetrieveTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerRetrieveTest tests the container retrieve functionality

func RunContainerUnsupportedTest

func RunContainerUnsupportedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunContainerUnsupportedTest tests that providers correctly return unsupported operation errors

func RunCountTokenTest

func RunCountTokenTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunCountTokenTest validates the CountTokens API for the configured provider/model. It sends a simple prompt as Responses messages and asserts token counts and metadata.

func RunCrossProviderConsistencyTest

func RunCrossProviderConsistencyTest(t *testing.T, client *bifrost.Bifrost, ctx *schemas.BifrostContext, config CrossProviderTestConfig, useResponsesAPI bool)

RunCrossProviderConsistencyTest tests same prompt across providers

func RunCrossProviderScenarioTest

func RunCrossProviderScenarioTest(t *testing.T, client *bifrost.Bifrost, ctx *schemas.BifrostContext, config CrossProviderTestConfig, scenario CrossProviderScenario, useResponsesAPI bool)

RunCrossProviderScenarioTest executes a complete scenario

func RunEagerInputStreamingTest added in v1.5.5

func RunEagerInputStreamingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunEagerInputStreamingTest tests that setting eager_input_streaming: true on a custom tool succeeds end-to-end against the target Anthropic-family provider. Per Table 20 (verified against A overview + B-header), the fine-grained-tool-streaming-2025-05-14 beta is supported on Anthropic, Bedrock, Vertex, and Azure.

The test verifies:

  1. The request is accepted (no upstream 400 — which would indicate the fine-grained-tool-streaming-2025-05-14 beta header wasn't injected or is rejected by the target provider).
  2. The stream produces a tool call with a valid JSON arguments payload.
  3. The response is otherwise well-formed.

This intentionally runs across all four providers (no single-provider gate unlike RunFastModeTest, which is Opus-4.6-only).

func RunEmbeddingTest

func RunEmbeddingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunEmbeddingTest executes the embedding test scenario

func RunEnd2EndToolCallingTest

func RunEnd2EndToolCallingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunEnd2EndToolCallingTest executes the end-to-end tool calling test scenario

func RunFastModeTest

func RunFastModeTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFastModeTest tests that the fast-mode-2026-02-01 beta header is correctly sent when speed="fast" is specified via ExtraParams.

This test verifies:

  1. The fast-mode beta header is properly injected when speed=fast
  2. The API accepts the request without error
  3. The response is valid

Note: Fast mode is currently only supported on Anthropic (direct API) with Opus 4.6.

func RunFileAndBatchIntegrationTest

func RunFileAndBatchIntegrationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileAndBatchIntegrationTest tests the integration between file upload and batch create

func RunFileBase64ChatCompletionsTest

func RunFileBase64ChatCompletionsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileBase64ChatCompletionsTest executes the file base64 test using Chat Completions API

func RunFileBase64ResponsesTest

func RunFileBase64ResponsesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileBase64ResponsesTest executes the file base64 test using Responses API

func RunFileBase64Test

func RunFileBase64Test(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileBase64Test executes the PDF file input test scenario with separate subtests for each API

func RunFileContentTest

func RunFileContentTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileContentTest tests the file content download functionality

func RunFileDeleteTest

func RunFileDeleteTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileDeleteTest tests the file delete functionality

func RunFileListTest

func RunFileListTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileListTest tests the file list functionality

func RunFileRetrieveTest

func RunFileRetrieveTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileRetrieveTest tests the file retrieve functionality

func RunFileURLChatCompletionsTest

func RunFileURLChatCompletionsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileURLChatCompletionsTest executes the file URL test using Chat Completions API

func RunFileURLResponsesTest

func RunFileURLResponsesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileURLResponsesTest executes the file URL test using Responses API

func RunFileURLTest

func RunFileURLTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileURLTest executes the file URL input test scenario with separate subtests for each API

func RunFileUnsupportedTest

func RunFileUnsupportedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileUnsupportedTest tests that unsupported providers return appropriate errors for file operations

func RunFileUploadTest

func RunFileUploadTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunFileUploadTest tests the file upload functionality

func RunImageBase64Test

func RunImageBase64Test(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunImageBase64Test executes the image base64 test scenario using dual API testing framework

func RunImageEditStreamTest

func RunImageEditStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunImageEditStreamTest executes the end-to-end streaming image edit test

func RunImageEditTest

func RunImageEditTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunImageEditTest executes the end-to-end image edit test (non-streaming)

func RunImageGenerationStreamTest

func RunImageGenerationStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunImageGenerationStreamTest executes the end-to-end streaming image generation test

func RunImageGenerationTest

func RunImageGenerationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunImageGenerationTest executes the end-to-end image generation test (non-streaming)

func RunImageURLTest

func RunImageURLTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunImageURLTest executes the image URL test scenario using dual API testing framework

func RunImageVariationStreamTest

func RunImageVariationStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunImageVariationStreamTest executes the end-to-end streaming image variation test Note: Currently, streaming image variation is not supported by any provider

func RunImageVariationTest

func RunImageVariationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunImageVariationTest executes the end-to-end image variation test (non-streaming)

func RunInterleavedThinkingTest

func RunInterleavedThinkingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunInterleavedThinkingTest tests that the interleaved-thinking-2025-05-14 beta header is correctly sent and that thinking works alongside tool calls.

This test verifies:

  1. The interleaved-thinking beta header is properly injected when thinking is enabled
  2. The API accepts the request with thinking + tools without error
  3. The response contains reasoning content

func RunListModelsErrorMarshalTest

func RunListModelsErrorMarshalTest(t *testing.T, _ *bifrost.Bifrost, _ context.Context, testConfig ComprehensiveTestConfig)

RunListModelsErrorMarshalTest verifies that the KeyStatus ↔ BifrostError circular reference pattern used by HandleMultipleListModelsRequests and HandleKeylessListModelsRequest marshals without cycle errors.

func RunListModelsPaginationTest

func RunListModelsPaginationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunListModelsPaginationTest executes pagination test for list models

func RunListModelsResponseMarshalTest

func RunListModelsResponseMarshalTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunListModelsResponseMarshalTest verifies that a successful ListModels response (including KeyStatuses) can be marshaled to JSON without cycle errors.

func RunListModelsTest

func RunListModelsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunListModelsTest executes the list models test scenario

func RunMultiTurnConversationTest

func RunMultiTurnConversationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunMultiTurnConversationTest executes the multi-turn conversation test scenario

func RunMultiTurnReasoningTest

func RunMultiTurnReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunMultiTurnReasoningTest tests multi-turn conversations with reasoning content passthrough. It verifies that reasoning details (text + signature) from assistant messages are correctly passed back to the model in follow-up turns via the Chat Completions API.

func RunMultipleImagesTest

func RunMultipleImagesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunMultipleImagesTest executes the multiple images test scenario

func RunMultipleToolCallsTest

func RunMultipleToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunMultipleToolCallsTest executes the multiple tool calls test scenario using dual API testing framework

func RunOpus45ReasoningTest

func RunOpus45ReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig)

RunOpus45ReasoningTest tests extended thinking with Opus 4.5 (budget_tokens mode)

func RunOpus46MultiTurnReasoningTest

func RunOpus46MultiTurnReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig)

RunOpus46MultiTurnReasoningTest tests multi-turn conversations with reasoning content passthrough. This verifies that reasoning details (text + signature) from assistant messages are correctly passed back to the model in follow-up turns.

func RunOpus46ReasoningTest

func RunOpus46ReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, config OpusReasoningTestConfig)

RunOpus46ReasoningTest tests adaptive thinking with Opus 4.6 (adaptive mode + effort)

func RunPassthroughAPITest

func RunPassthroughAPITest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunPassthroughAPITest exercises Bifrost's raw HTTP passthrough API for the configured provider using two sub-tests:

  • PassthroughAPI/NonStream – calls client.Passthrough and verifies a 2xx response with a non-empty body and correct ExtraFields.
  • PassthroughAPI/Stream – calls client.PassthroughStream and verifies that at least one chunk with body data is received.

The test is skipped when Scenarios.PassthroughAPI is false or the provider's native request format is not yet covered by buildPassthroughChatReq.

func RunPassthroughExtraParamsTest

func RunPassthroughExtraParamsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunPassthroughExtraParamsTest executes the passthrough extraParams test scenario This test verifies that extraParams are properly propagated into the provider request body when the passthrough flag is set in the context. Note: This test only runs for providers that support arbitrary extra params at the root level of the request body. Providers like Anthropic have strict schema validation and don't accept unknown fields, so they should set PassThroughExtraParams: false in their test config.

func RunPromptCachingMultiTurnTest

func RunPromptCachingMultiTurnTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunPromptCachingMultiTurnTest verifies prompt caching across a 10-turn multi-turn conversation. Each turn appends the assistant's previous response and a new user message, while keeping the system message and tools constant. The system prefix + tools form the cached prefix; turns 2+ should show cached_read_tokens > 0, proving caching is intact.

func RunPromptCachingMultipleToolCallsTest

func RunPromptCachingMultipleToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunPromptCachingMultipleToolCallsTest verifies prompt caching across a 10-turn conversation with tool calls scattered throughout. This directly reproduces the Vertex caching bug where Bifrost's key reordering in tool_use input fields caused the cache prefix to diverge at the first tool_use block.

The conversation grows from ~9 messages (turn 1) to ~19 messages (turn 5), matching the real-world Claude Code pattern (11, 13, 15 messages across turns). Tool calls use DIFFERENT key orderings per block to test key order preservation.

Each turn verifies:

  1. The response succeeds and has content
  2. cache_read_input_tokens grows across turns (proving prefix stability)
  3. For Anthropic/Vertex: cache_control markers survive in raw request
  4. For Anthropic/Vertex: tool_use input key ordering is preserved in raw request

func RunPromptCachingTest

func RunPromptCachingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunPromptCachingTest executes the prompt caching test scenario This test verifies that OpenAI's prompt caching works correctly with tools by making multiple requests with the same long prefix and tools, and verifying that cached tokens increase in subsequent requests.

func RunPromptCachingToolBlocksTest

func RunPromptCachingToolBlocksTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunPromptCachingToolBlocksTest validates that cache_control on tool_use and tool_result content blocks survives the Bifrost round-trip (Anthropic format -> Bifrost ResponsesMessage -> Provider format). It sends a Responses API request with cache_control on function_call and function_call_output messages, enables raw request capture, and inspects the outgoing provider request to verify cache markers are present.

For Anthropic/Vertex: verifies "cache_control" appears on tool_use and tool_result content blocks. For Bedrock: verifies "cachePoint" blocks appear after toolUse and toolResult blocks.

func RunRealtimeTest

func RunRealtimeTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunRealtimeTest dials the provider's native Realtime WebSocket endpoint, sends a text-based conversation turn, and validates the session + response events.

func RunRerankTest

func RunRerankTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunRerankTest executes the rerank test scenario

func RunResponsesReasoningTest

func RunResponsesReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunResponsesReasoningTest executes the reasoning test scenario to test thinking capabilities via Responses API only

func RunResponsesStreamTest

func RunResponsesStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunResponsesStreamTest executes the responses streaming test scenario

func RunServerToolsViaOpenAIEndpointTest added in v1.5.5

func RunServerToolsViaOpenAIEndpointTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunServerToolsViaOpenAIEndpointTest reproduces the user-reported bug where sending an Anthropic-server-tool-shaped entry in tools[] via the OpenAI- compatible chat-completions endpoint was silently dropped (Claude responded with a prose "I can't check real-time data" fallback). The fix was a combination of:

  • ChatTool schema gaining Name + all server-tool variant fields.
  • ToAnthropicChatRequest learning to convert non-function tools (server tools) into AnthropicTool with the correct variant embed.

This test sends the exact curl-reported shape via BifrostChatRequest + ChatCompletionRequest and asserts the request succeeds end-to-end against the provider. It covers three server tools that have single-turn triggers (web_search, web_fetch, code_execution) across all supporting providers per Table 20. Other variants (bash, memory, text_editor, tool_search, mcp_toolset, computer_use) require multi-turn tool loops or infra setup and are covered by the schema / unit-level round-trip tests instead.

func RunSimpleChatTest

func RunSimpleChatTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunSimpleChatTest executes the simple chat test scenario using dual API testing framework

func RunSpeechSynthesisAdvancedTest

func RunSpeechSynthesisAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunSpeechSynthesisAdvancedTest executes advanced speech synthesis test scenarios

func RunSpeechSynthesisStreamAdvancedTest

func RunSpeechSynthesisStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunSpeechSynthesisStreamAdvancedTest executes advanced streaming speech synthesis test scenarios

func RunSpeechSynthesisStreamTest

func RunSpeechSynthesisStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunSpeechSynthesisStreamTest executes the streaming speech synthesis test scenario

func RunSpeechSynthesisTest

func RunSpeechSynthesisTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunSpeechSynthesisTest executes the speech synthesis test scenario

func RunStreamErrorStatusCodeTest

func RunStreamErrorStatusCodeTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunStreamErrorStatusCodeTest validates that pre-stream errors from providers carry the correct HTTP status code in BifrostError.StatusCode. This is critical because the HTTP transport layer (sendStreamError) relies on this field to propagate the provider's actual status code to clients, rather than always returning 200 OK.

The test sends a streaming request with a deliberately invalid model name. All providers (OpenAI, Anthropic, Bedrock) return 4xx status codes for such errors, and Bifrost must preserve those codes through the error chain.

func RunStructuredOutputChatStreamTest

func RunStructuredOutputChatStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunStructuredOutputChatStreamTest tests structured outputs with Chat Completions API (streaming)

func RunStructuredOutputChatTest

func RunStructuredOutputChatTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunStructuredOutputChatTest tests structured outputs with Chat Completions API (non-streaming)

func RunStructuredOutputResponsesStreamTest

func RunStructuredOutputResponsesStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunStructuredOutputResponsesStreamTest tests structured outputs with Responses API (streaming)

func RunStructuredOutputResponsesTest

func RunStructuredOutputResponsesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunStructuredOutputResponsesTest tests structured outputs with Responses API (non-streaming)

func RunTextCompletionStreamTest

func RunTextCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunTextCompletionStreamTest executes the text completion streaming test scenario

func RunTextCompletionTest

func RunTextCompletionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunTextCompletionTest tests text completion functionality

func RunToolCallsStreamingTest

func RunToolCallsStreamingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunToolCallsStreamingTest executes the tool calls streaming test scenario

func RunToolCallsTest

func RunToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunToolCallsTest executes the tool calls test scenario using dual API testing framework

func RunToolCallsWithEmptyPropertiesTest

func RunToolCallsWithEmptyPropertiesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunToolCallsWithEmptyPropertiesTest tests tool calls with explicitly empty properties ({})

func RunToolCallsWithNilPropertiesTest

func RunToolCallsWithNilPropertiesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunToolCallsWithNilPropertiesTest tests tool calls with nil properties (not defined)

func RunTranscriptionAdvancedTest

func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunTranscriptionAdvancedTest executes advanced transcription test scenarios

func RunTranscriptionStreamAdvancedTest

func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunTranscriptionStreamAdvancedTest executes advanced streaming transcription test scenarios

func RunTranscriptionStreamTest

func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunTranscriptionStreamTest executes the streaming transcription test scenario

func RunTranscriptionTest

func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunTranscriptionTest executes the transcription test scenario

func RunVideoDeleteTest

func RunVideoDeleteTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

func RunVideoDownloadTest

func RunVideoDownloadTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

func RunVideoGenerationTest

func RunVideoGenerationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

func RunVideoListTest

func RunVideoListTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

func RunVideoRemixTest

func RunVideoRemixTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

func RunVideoRetrieveTest

func RunVideoRetrieveTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

func RunVideoUnsupportedTest

func RunVideoUnsupportedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

func RunWebSearchToolContextSizesTest

func RunWebSearchToolContextSizesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunWebSearchToolContextSizesTest tests different search context sizes

func RunWebSearchToolMaxUsesTest

func RunWebSearchToolMaxUsesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunWebSearchToolMaxUsesTest tests Anthropic-specific max uses parameter

func RunWebSearchToolMultiTurnTest

func RunWebSearchToolMultiTurnTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunWebSearchToolMultiTurnTest tests multi-turn conversation with web search

func RunWebSearchToolStreamTest

func RunWebSearchToolStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunWebSearchToolStreamTest executes streaming web search test

func RunWebSearchToolTest

func RunWebSearchToolTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

This test verifies that the web search tool is properly invoked and returns results

func RunWebSearchToolWithDomainsTest

func RunWebSearchToolWithDomainsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunWebSearchToolWithDomainsTest tests web search with domain filtering

func RunWebSocketResponsesTest

func RunWebSocketResponsesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig)

RunWebSocketResponsesTest dials the provider's native WebSocket Responses endpoint, sends a response.create event, and validates the streaming events that come back.

func SaveAndValidateAudio

func SaveAndValidateAudio(t *testing.T, audioData []byte) (string, error)

SaveAndValidateAudio saves audio bytes to a temp file, validates it, and registers cleanup. It auto-detects the audio format from magic bytes and validates it's one of the allowed formats. Returns the temp file path for logging purposes.

func SetupTest

SetupTest initializes a test environment with timeout context

func ShouldRunParallel

func ShouldRunParallel(t *testing.T, testConfig ComprehensiveTestConfig, scenario string)

ShouldRunParallel checks if a test should run in parallel based on environment variables and provider-specific configuration. It marks the test as parallel if parallel execution is allowed for this scenario.

Parameters:

  • t: the testing.T instance
  • testConfig: the comprehensive test config containing DisableParallelFor settings
  • scenario: the test scenario name (e.g., "Transcription", "SpeechSynthesis")

func ValidateAudioBytes

func ValidateAudioBytes(t *testing.T, audioData []byte, expectedFormat string) error

ValidateAudioBytes validates audio bytes by checking magic bytes and attempting decode

func ValidateAudioFile

func ValidateAudioFile(t *testing.T, filePath string, expectedFormat string) error

ValidateAudioFile validates an audio file by checking magic bytes and attempting decode

func ValidateExtraFieldsRaw

func ValidateExtraFieldsRaw(extraFields schemas.BifrostResponseExtraFields) []error

ValidateExtraFieldsRaw validates rawRequest and rawResponse on BifrostResponseExtraFields

func ValidateRawField

func ValidateRawField(field interface{}, fieldName string) error

ValidateRawField checks that a raw request/response field is: 1. Non-nil 2. Valid JSON (parseable) 3. Compact JSON (no unnecessary whitespace) Returns an error describing the validation failure, or nil if valid.

func ValidateWebSearchSources

func ValidateWebSearchSources(t *testing.T, sources []schemas.ResponsesWebSearchToolCallActionSearchSource, allowedDomains []string)

ValidateWebSearchSources validates web search sources structure and domain filtering

func WithBatchCancelTestRetry

func WithBatchCancelTestRetry(
	t *testing.T,
	config BatchCancelRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostBatchCancelResponse, *schemas.BifrostError),
) (*schemas.BifrostBatchCancelResponse, *schemas.BifrostError)

WithBatchCancelTestRetry wraps a batch cancel test operation with retry logic

func WithBatchCreateTestRetry

func WithBatchCreateTestRetry(
	t *testing.T,
	config BatchCreateRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostBatchCreateResponse, *schemas.BifrostError),
) (*schemas.BifrostBatchCreateResponse, *schemas.BifrostError)

WithBatchCreateTestRetry wraps a batch create test operation with retry logic

func WithBatchListTestRetry

func WithBatchListTestRetry(
	t *testing.T,
	config BatchListRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostBatchListResponse, *schemas.BifrostError),
) (*schemas.BifrostBatchListResponse, *schemas.BifrostError)

WithBatchListTestRetry wraps a batch list test operation with retry logic

func WithBatchResultsTestRetry

func WithBatchResultsTestRetry(
	t *testing.T,
	config BatchResultsRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostBatchResultsResponse, *schemas.BifrostError),
) (*schemas.BifrostBatchResultsResponse, *schemas.BifrostError)

WithBatchResultsTestRetry wraps a batch results test operation with retry logic

func WithBatchRetrieveTestRetry

func WithBatchRetrieveTestRetry(
	t *testing.T,
	config BatchRetrieveRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostBatchRetrieveResponse, *schemas.BifrostError),
) (*schemas.BifrostBatchRetrieveResponse, *schemas.BifrostError)

WithBatchRetrieveTestRetry wraps a batch retrieve test operation with retry logic

func WithChatTestRetry

func WithChatTestRetry(
	t *testing.T,
	config ChatRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostChatResponse, *schemas.BifrostError),
) (*schemas.BifrostChatResponse, *schemas.BifrostError)

WithChatTestRetry wraps a chat test operation with retry logic for LLM behavior inconsistencies

func WithCountTokensTestRetry

func WithCountTokensTestRetry(
	t *testing.T,
	config CountTokensRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostCountTokensResponse, *schemas.BifrostError),
) (*schemas.BifrostCountTokensResponse, *schemas.BifrostError)

WithCountTokensTestRetry wraps a count tokens test operation with retry logic for LLM behavior inconsistencies

func WithEmbeddingTestRetry

func WithEmbeddingTestRetry(
	t *testing.T,
	config EmbeddingRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError),
) (*schemas.BifrostEmbeddingResponse, *schemas.BifrostError)

WithEmbeddingTestRetry wraps an embedding test operation with retry logic for LLM behavior inconsistencies

func WithFileContentTestRetry

func WithFileContentTestRetry(
	t *testing.T,
	config FileContentRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostFileContentResponse, *schemas.BifrostError),
) (*schemas.BifrostFileContentResponse, *schemas.BifrostError)

WithFileContentTestRetry wraps a file content test operation with retry logic

func WithFileDeleteTestRetry

func WithFileDeleteTestRetry(
	t *testing.T,
	config FileDeleteRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostFileDeleteResponse, *schemas.BifrostError),
) (*schemas.BifrostFileDeleteResponse, *schemas.BifrostError)

WithFileDeleteTestRetry wraps a file delete test operation with retry logic

func WithFileListTestRetry

func WithFileListTestRetry(
	t *testing.T,
	config FileListRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostFileListResponse, *schemas.BifrostError),
) (*schemas.BifrostFileListResponse, *schemas.BifrostError)

WithFileListTestRetry wraps a file list test operation with retry logic

func WithFileRetrieveTestRetry

func WithFileRetrieveTestRetry(
	t *testing.T,
	config FileRetrieveRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostFileRetrieveResponse, *schemas.BifrostError),
) (*schemas.BifrostFileRetrieveResponse, *schemas.BifrostError)

WithFileRetrieveTestRetry wraps a file retrieve test operation with retry logic

func WithFileUploadTestRetry

func WithFileUploadTestRetry(
	t *testing.T,
	config FileUploadRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostFileUploadResponse, *schemas.BifrostError),
) (*schemas.BifrostFileUploadResponse, *schemas.BifrostError)

WithFileUploadTestRetry wraps a file upload test operation with retry logic

func WithListModelsTestRetry

func WithListModelsTestRetry(
	t *testing.T,
	config ListModelsRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostListModelsResponse, *schemas.BifrostError),
) (*schemas.BifrostListModelsResponse, *schemas.BifrostError)

WithListModelsTestRetry wraps a list models test operation with retry logic IMPORTANT: ALWAYS retries on ANY failure condition (errors, nil response, empty data, validation failures) This ensures maximum resilience for list models tests

func WithResponsesTestRetry

func WithResponsesTestRetry(
	t *testing.T,
	config ResponsesRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError),
) (*schemas.BifrostResponsesResponse, *schemas.BifrostError)

WithResponsesTestRetry wraps a Responses API test operation with retry logic for LLM behavior inconsistencies

func WithSpeechTestRetry

func WithSpeechTestRetry(
	t *testing.T,
	config SpeechRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostSpeechResponse, *schemas.BifrostError),
) (*schemas.BifrostSpeechResponse, *schemas.BifrostError)

WithSpeechTestRetry wraps a speech test operation with retry logic for LLM behavior inconsistencies

func WithStreamRetry

func WithStreamRetry(
	t *testing.T,
	config TestRetryConfig,
	context TestRetryContext,
	operation func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError),
) (chan *schemas.BifrostStreamChunk, *schemas.BifrostError)

WithStreamRetry wraps a streaming operation with retry logic for LLM behavioral inconsistencies

func WithTextCompletionTestRetry

func WithTextCompletionTestRetry(
	t *testing.T,
	config TextCompletionRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostTextCompletionResponse, *schemas.BifrostError),
) (*schemas.BifrostTextCompletionResponse, *schemas.BifrostError)

WithTextCompletionTestRetry wraps a text completion test operation with retry logic for LLM behavior inconsistencies

func WithTranscriptionTestRetry

func WithTranscriptionTestRetry(
	t *testing.T,
	config TranscriptionRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	operation func() (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError),
) (*schemas.BifrostTranscriptionResponse, *schemas.BifrostError)

WithTranscriptionTestRetry wraps a transcription test operation with retry logic for LLM behavior inconsistencies

Types

type AudioValidationResult

type AudioValidationResult struct {
	Valid           bool
	Format          string
	MagicBytesValid bool
	DecodeValid     bool
	FileSize        int64
	Errors          []string
}

AudioValidationResult contains the results of audio validation

type BatchCancelRetryCondition

type BatchCancelRetryCondition interface {
	ShouldRetry(response *schemas.BifrostBatchCancelResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

BatchCancelRetryCondition defines an interface for checking if a batch cancel test operation should be retried

type BatchCancelRetryConfig

type BatchCancelRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []BatchCancelRetryCondition                      // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

BatchCancelRetryConfig configures retry behavior for batch cancel test scenarios

type BatchCreateRetryCondition

type BatchCreateRetryCondition interface {
	ShouldRetry(response *schemas.BifrostBatchCreateResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

BatchCreateRetryCondition defines an interface for checking if a batch create test operation should be retried

type BatchCreateRetryConfig

type BatchCreateRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []BatchCreateRetryCondition                      // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

BatchCreateRetryConfig configures retry behavior for batch create test scenarios

type BatchListRetryCondition

type BatchListRetryCondition interface {
	ShouldRetry(response *schemas.BifrostBatchListResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

BatchListRetryCondition defines an interface for checking if a batch list test operation should be retried

type BatchListRetryConfig

type BatchListRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []BatchListRetryCondition                        // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

BatchListRetryConfig configures retry behavior for batch list test scenarios

type BatchResultsRetryCondition

type BatchResultsRetryCondition interface {
	ShouldRetry(response *schemas.BifrostBatchResultsResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

BatchResultsRetryCondition defines an interface for checking if a batch results test operation should be retried

type BatchResultsRetryConfig

type BatchResultsRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []BatchResultsRetryCondition                     // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

BatchResultsRetryConfig configures retry behavior for batch results test scenarios

type BatchRetrieveRetryCondition

type BatchRetrieveRetryCondition interface {
	ShouldRetry(response *schemas.BifrostBatchRetrieveResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

BatchRetrieveRetryCondition defines an interface for checking if a batch retrieve test operation should be retried

type BatchRetrieveRetryConfig

type BatchRetrieveRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []BatchRetrieveRetryCondition                    // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

BatchRetrieveRetryConfig configures retry behavior for batch retrieve test scenarios

type ChatRetryCondition

type ChatRetryCondition interface {
	ShouldRetry(response *schemas.BifrostChatResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

ChatRetryCondition defines an interface for checking if a chat test operation should be retried

type ChatRetryConfig

type ChatRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []ChatRetryCondition                             // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

ChatRetryConfig configures retry behavior for chat test scenarios

type ChatStreamValidationResult

type ChatStreamValidationResult struct {
	Passed           bool
	Errors           []string
	ReceivedData     bool
	StreamErrors     []string
	ToolCallDetected bool
	ResponseCount    int
}

ChatStreamValidationResult represents the result of chat streaming validation

func WithChatStreamValidationRetry

func WithChatStreamValidationRetry(
	t *testing.T,
	config TestRetryConfig,
	context TestRetryContext,
	operation func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError),
	validateStream func(chan *schemas.BifrostStreamChunk) ChatStreamValidationResult,
) ChatStreamValidationResult

WithChatStreamValidationRetry wraps a chat streaming operation with retry logic that includes stream content validation This function wraps the entire operation (request + stream reading + validation) and retries on validation failures

type ComprehensiveTestAccount

type ComprehensiveTestAccount struct{}

ComprehensiveTestAccount provides a test implementation of the Account interface for comprehensive testing.

func (*ComprehensiveTestAccount) GetConfigForProvider

func (account *ComprehensiveTestAccount) GetConfigForProvider(providerKey schemas.ModelProvider) (*schemas.ProviderConfig, error)

GetConfigForProvider returns the configuration settings for a given provider.

func (*ComprehensiveTestAccount) GetConfiguredProviders

func (account *ComprehensiveTestAccount) GetConfiguredProviders() ([]schemas.ModelProvider, error)

GetConfiguredProviders returns the list of initially supported providers.

func (*ComprehensiveTestAccount) GetKeysForProvider

func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx context.Context, providerKey schemas.ModelProvider) ([]schemas.Key, error)

GetKeysForProvider returns the API keys and associated models for a given provider.

type ComprehensiveTestConfig

type ComprehensiveTestConfig struct {
	Provider                 schemas.ModelProvider
	TextModel                string
	ChatModel                string
	PromptCachingModel       string
	VisionModel              string
	ReasoningModel           string
	EmbeddingModel           string
	RerankModel              string
	TranscriptionModel       string
	SpeechSynthesisModel     string
	ChatAudioModel           string
	Scenarios                TestScenarios
	Fallbacks                []schemas.Fallback     // for chat, responses, image and reasoning tests
	TextCompletionFallbacks  []schemas.Fallback     // for text completion tests
	TranscriptionFallbacks   []schemas.Fallback     // for transcription tests
	SpeechSynthesisFallbacks []schemas.Fallback     // for speech synthesis tests
	EmbeddingFallbacks       []schemas.Fallback     // for embedding tests
	RerankFallbacks          []schemas.Fallback     // for rerank tests
	SkipReason               string                 // Reason to skip certain tests
	ImageGenerationModel     string                 // Model for image generation
	ImageGenerationFallbacks []schemas.Fallback     // Fallbacks for image generation
	ImageEditModel           string                 // Model for image editing
	ImageEditFallbacks       []schemas.Fallback     // Fallbacks for image editing
	ImageVariationModel      string                 // Model for image variation
	ImageVariationFallbacks  []schemas.Fallback     // Fallbacks for image variation
	VideoGenerationModel     string                 // Model for video generation
	ExternalTTSProvider      schemas.ModelProvider  // External TTS provider to use for testing
	ExternalTTSModel         string                 // External TTS model to use for testing
	BatchExtraParams         map[string]interface{} // Extra params for batch operations (e.g., role_arn, output_s3_uri for Bedrock)
	FileExtraParams          map[string]interface{} // Extra params for file operations (e.g., s3_bucket for Bedrock)
	DisableParallelFor       []string               // Test scenarios to disable parallel execution for (e.g., "Transcription" for rate-limited APIs)
	ExpectRawRequestResponse bool                   // When true, validate rawRequest/rawResponse in ExtraFields
	PassthroughModel         string                 // Model for passthrough API tests; defaults to ChatModel when empty
	CompactionModel          string                 // Model for compaction tests; defaults to claude-sonnet-4-6
	InterleavedThinkingModel string                 // Model for interleaved thinking tests; defaults to claude-opus-4-5
	FastModeModel            string                 // Model for fast mode tests; defaults to claude-opus-4-6
	RealtimeModel            string                 // Model for Realtime API (e.g., "gpt-4o-realtime-preview")
}

ComprehensiveTestConfig extends TestConfig with additional scenarios

type ConsistencyResult

type ConsistencyResult struct {
	Provider       schemas.ModelProvider
	Response       string
	SentenceCount  int
	WordCount      int
	ContainsAI     bool
	ContainsFuture bool
}

type ContentValidationCondition

type ContentValidationCondition struct{}

ContentValidationCondition checks if response fails basic content validation This is crucial for vision tests where the AI might give different descriptions

func (*ContentValidationCondition) GetConditionName

func (c *ContentValidationCondition) GetConditionName() string

func (*ContentValidationCondition) ShouldRetry

type ConversationSettings

type ConversationSettings struct {
	MaxMessages                int
	ConversationGeneratorModel string
	RequiredMessageTypes       []MessageModality
}

ConversationSettings controls conversation generation

type CountTokensRetryCondition

type CountTokensRetryCondition interface {
	ShouldRetry(response *schemas.BifrostCountTokensResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

CountTokensRetryCondition defines an interface for checking if a count tokens test operation should be retried

type CountTokensRetryConfig

type CountTokensRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []CountTokensRetryCondition                      // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

CountTokensRetryConfig configures retry behavior for count tokens test scenarios

type CrossProviderScenario

type CrossProviderScenario struct {
	Name               string
	Description        string
	InitialMessage     string
	ExpectedFlow       []ScenarioStep
	MaxMessages        int
	RequiredModalities []MessageModality
	SuccessCriteria    ScenarioSuccess
}

CrossProviderScenario defines a complete test scenario

func GetPredefinedScenarios

func GetPredefinedScenarios() []CrossProviderScenario

GetPredefinedScenarios returns all available test scenarios

type CrossProviderTestConfig

type CrossProviderTestConfig struct {
	Providers            []ProviderConfig
	ConversationSettings ConversationSettings
	TestSettings         TestSettings
}

CrossProviderTestConfig configures the entire test

type DualAPITestResult

type DualAPITestResult struct {
	ChatCompletionsResponse *schemas.BifrostChatResponse
	ChatCompletionsError    *schemas.BifrostError
	ResponsesAPIResponse    *schemas.BifrostResponsesResponse
	ResponsesAPIError       *schemas.BifrostError
	BothSucceeded           bool
}

DualAPITestResult represents the result of testing both Chat Completions and Responses APIs

func WithDualAPITestRetry

func WithDualAPITestRetry(
	t *testing.T,
	config TestRetryConfig,
	context TestRetryContext,
	expectations ResponseExpectations,
	scenarioName string,
	chatOperation func() (*schemas.BifrostChatResponse, *schemas.BifrostError),
	responsesOperation func() (*schemas.BifrostResponsesResponse, *schemas.BifrostError),
) DualAPITestResult

WithDualAPITestRetry wraps a test operation with retry logic for both Chat Completions and Responses API The test passes only when BOTH APIs succeed according to expectations

RETRY STRATEGY: Validation failures ALWAYS trigger retries (primary purpose: functionality checks) Network errors are handled by bifrost core, so retries here focus on content/functionality validation

type EmbeddingRetryCondition

type EmbeddingRetryCondition interface {
	ShouldRetry(response *schemas.BifrostEmbeddingResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

EmbeddingRetryCondition defines an interface for checking if an embedding test operation should be retried

type EmbeddingRetryConfig

type EmbeddingRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []EmbeddingRetryCondition                        // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

EmbeddingRetryConfig configures retry behavior for embedding test scenarios

type EmptyCountTokensCondition

type EmptyCountTokensCondition struct{}

EmptyCountTokensCondition checks for missing or invalid token counts

func (*EmptyCountTokensCondition) GetConditionName

func (c *EmptyCountTokensCondition) GetConditionName() string

func (*EmptyCountTokensCondition) ShouldRetry

type EmptyEmbeddingCondition

type EmptyEmbeddingCondition struct{}

EmptyEmbeddingCondition checks for missing or empty embeddings

func (*EmptyEmbeddingCondition) GetConditionName

func (c *EmptyEmbeddingCondition) GetConditionName() string

func (*EmptyEmbeddingCondition) ShouldRetry

func (c *EmptyEmbeddingCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)

type EmptyImageGenerationCondition

type EmptyImageGenerationCondition struct{}

EmptyImageGenerationCondition checks for missing or invalid image data

func (*EmptyImageGenerationCondition) GetConditionName

func (c *EmptyImageGenerationCondition) GetConditionName() string

func (*EmptyImageGenerationCondition) ShouldRetry

type EmptyResponseCondition

type EmptyResponseCondition struct{}

EmptyResponseCondition checks for empty or missing response content

func (*EmptyResponseCondition) GetConditionName

func (c *EmptyResponseCondition) GetConditionName() string

func (*EmptyResponseCondition) ShouldRetry

func (c *EmptyResponseCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)

type EmptySpeechCondition

type EmptySpeechCondition struct{}

EmptySpeechCondition checks for missing or invalid audio data in speech synthesis responses

func (*EmptySpeechCondition) GetConditionName

func (c *EmptySpeechCondition) GetConditionName() string

func (*EmptySpeechCondition) ShouldRetry

func (c *EmptySpeechCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)

type EmptyTranscriptionCondition

type EmptyTranscriptionCondition struct{}

EmptyTranscriptionCondition checks for missing or invalid transcription text

func (*EmptyTranscriptionCondition) GetConditionName

func (c *EmptyTranscriptionCondition) GetConditionName() string

func (*EmptyTranscriptionCondition) ShouldRetry

type ErrorCategory

type ErrorCategory struct {
	Name        string
	Description string
	Color       string // For potential colored output
}

ErrorCategory represents different types of errors

type EvaluationRequest

type EvaluationRequest struct {
	ScenarioContext string
	UserMessage     string
	LLMResponse     string
	Provider        schemas.ModelProvider
	Criteria        StepSuccess
	APIType         string // "chat" or "responses"
}

EvaluationRequest contains data for evaluation

type EvaluationResult

type EvaluationResult struct {
	Passed            bool     `json:"passed"`
	Score             float64  `json:"score"`
	KeywordCheck      string   `json:"keyword_check"`
	ForbiddenCheck    string   `json:"forbidden_check"`
	ToolCheck         string   `json:"tool_check"`
	QualityAssessment string   `json:"quality_assessment"`
	Suggestions       string   `json:"suggestions"`
	FatalIssues       []string `json:"fatal_issues"`
}

EvaluationResult contains evaluation results

type FileContentRetryCondition

type FileContentRetryCondition interface {
	ShouldRetry(response *schemas.BifrostFileContentResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

FileContentRetryCondition defines an interface for checking if a file content test operation should be retried

type FileContentRetryConfig

type FileContentRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []FileContentRetryCondition                      // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

FileContentRetryConfig configures retry behavior for file content test scenarios

type FileDeleteRetryCondition

type FileDeleteRetryCondition interface {
	ShouldRetry(response *schemas.BifrostFileDeleteResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

FileDeleteRetryCondition defines an interface for checking if a file delete test operation should be retried

type FileDeleteRetryConfig

type FileDeleteRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []FileDeleteRetryCondition                       // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

FileDeleteRetryConfig configures retry behavior for file delete test scenarios

type FileListRetryCondition

type FileListRetryCondition interface {
	ShouldRetry(response *schemas.BifrostFileListResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

FileListRetryCondition defines an interface for checking if a file list test operation should be retried

type FileListRetryConfig

type FileListRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []FileListRetryCondition                         // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

FileListRetryConfig configures retry behavior for file list test scenarios

type FileNotProcessedCondition

type FileNotProcessedCondition struct{}

FileNotProcessedCondition checks if file/document was not properly processed

func (*FileNotProcessedCondition) GetConditionName

func (c *FileNotProcessedCondition) GetConditionName() string

func (*FileNotProcessedCondition) ShouldRetry

type FileRetrieveRetryCondition

type FileRetrieveRetryCondition interface {
	ShouldRetry(response *schemas.BifrostFileRetrieveResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

FileRetrieveRetryCondition defines an interface for checking if a file retrieve test operation should be retried

type FileRetrieveRetryConfig

type FileRetrieveRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []FileRetrieveRetryCondition                     // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

FileRetrieveRetryConfig configures retry behavior for file retrieve test scenarios

type FileUploadRetryCondition

type FileUploadRetryCondition interface {
	ShouldRetry(response *schemas.BifrostFileUploadResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

FileUploadRetryCondition defines an interface for checking if a file upload test operation should be retried

type FileUploadRetryConfig

type FileUploadRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []FileUploadRetryCondition                       // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

FileUploadRetryConfig configures retry behavior for file upload test scenarios

type GeneratedFollowup

type GeneratedFollowup struct {
	UserMessage      string `json:"user_message"`
	ModalityContext  string `json:"modality_context"`
	ExpectedBehavior string `json:"expected_behavior"`
	TestFocus        string `json:"test_focus"`
}

GeneratedFollowup contains the generated followup message

type GenericResponseCondition

type GenericResponseCondition struct{}

GenericResponseCondition checks for generic/template responses

func (*GenericResponseCondition) GetConditionName

func (c *GenericResponseCondition) GetConditionName() string

func (*GenericResponseCondition) ShouldRetry

type ImageGenerationRetryCondition

type ImageGenerationRetryCondition interface {
	ShouldRetry(response *schemas.BifrostImageGenerationResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

ImageGenerationRetryCondition defines an interface for checking if an image generation test operation should be retried

type ImageGenerationRetryConfig

type ImageGenerationRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []ImageGenerationRetryCondition                  // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

ImageGenerationRetryConfig configures retry behavior for image generation test scenarios

type ImageGenerationStreamValidationResult

type ImageGenerationStreamValidationResult struct {
	Passed       bool
	Errors       []string
	ReceivedData bool
	StreamErrors []string
	LastLatency  int64
}

func WithImageGenerationStreamRetry

func WithImageGenerationStreamRetry(
	t *testing.T,
	config TestRetryConfig,
	context TestRetryContext,
	operation func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError),
	validateStream func(chan *schemas.BifrostStreamChunk) ImageGenerationStreamValidationResult) ImageGenerationStreamValidationResult

WithImageGenerationStreamRetry wraps an image generation streaming operation with retry logic that includes stream content validation This function wraps the entire operation (request + stream reading + validation) and retries on validation failures

type ImageNotProcessedCondition

type ImageNotProcessedCondition struct{}

ImageNotProcessedCondition checks if image content was actually processed

func (*ImageNotProcessedCondition) GetConditionName

func (c *ImageNotProcessedCondition) GetConditionName() string

func (*ImageNotProcessedCondition) ShouldRetry

type IncompleteStreamCondition

type IncompleteStreamCondition struct{}

IncompleteStreamCondition checks for incomplete streaming responses

func (*IncompleteStreamCondition) GetConditionName

func (c *IncompleteStreamCondition) GetConditionName() string

func (*IncompleteStreamCondition) ShouldRetry

type InvalidCountTokensCondition

type InvalidCountTokensCondition struct{}

InvalidCountTokensCondition checks for invalid token count data

func (*InvalidCountTokensCondition) GetConditionName

func (c *InvalidCountTokensCondition) GetConditionName() string

func (*InvalidCountTokensCondition) ShouldRetry

type InvalidEmbeddingDimensionCondition

type InvalidEmbeddingDimensionCondition struct {
	ExpectedDimension int // Expected vector dimension (0 means any)
}

InvalidEmbeddingDimensionCondition checks for inconsistent embedding dimensions

func (*InvalidEmbeddingDimensionCondition) GetConditionName

func (c *InvalidEmbeddingDimensionCondition) GetConditionName() string

func (*InvalidEmbeddingDimensionCondition) ShouldRetry

type ListModelsRetryCondition

type ListModelsRetryCondition interface {
	ShouldRetry(response *schemas.BifrostListModelsResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

ListModelsRetryCondition defines an interface for checking if a list models test operation should be retried

type ListModelsRetryConfig

type ListModelsRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []ListModelsRetryCondition                       // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

ListModelsRetryConfig configures retry behavior for list models test scenarios

type MalformedToolArgsCondition

type MalformedToolArgsCondition struct{}

MalformedToolArgsCondition checks for malformed tool call arguments

func (*MalformedToolArgsCondition) GetConditionName

func (c *MalformedToolArgsCondition) GetConditionName() string

func (*MalformedToolArgsCondition) ShouldRetry

type MessageModality

type MessageModality string

MessageModality defines the type of interaction required

const (
	ModalityText      MessageModality = "text"
	ModalityTool      MessageModality = "tool"
	ModalityVision    MessageModality = "vision"
	ModalityReasoning MessageModality = "reasoning"
)

type MissingToolCallCondition

type MissingToolCallCondition struct {
	ExpectedToolName string // Name of the tool that should have been called
}

MissingToolCallCondition checks if expected tool call is missing

func (*MissingToolCallCondition) GetConditionName

func (c *MissingToolCallCondition) GetConditionName() string

func (*MissingToolCallCondition) ShouldRetry

type NextMessageRequest

type NextMessageRequest struct {
	Scenario            CrossProviderScenario
	ConversationHistory []schemas.ChatMessage
	CurrentStepNumber   int
	NextStep            ScenarioStep
	PreviousEvaluation  *EvaluationResult
	APIType             string // "chat" or "responses"
}

NextMessageRequest contains data for generating next message

type OpenAIConversationDriver

type OpenAIConversationDriver struct {
	// contains filtered or unexported fields
}

OpenAIConversationDriver generates followup messages

func NewOpenAIConversationDriver

func NewOpenAIConversationDriver(client *bifrost.Bifrost, driverModel string, t *testing.T) *OpenAIConversationDriver

NewOpenAIConversationDriver creates a new conversation driver

func (*OpenAIConversationDriver) GenerateNextMessage

func (driver *OpenAIConversationDriver) GenerateNextMessage(ctx *schemas.BifrostContext, request NextMessageRequest) (*GeneratedFollowup, error)

GenerateNextMessage creates a natural followup message

type OpenAIJudge

type OpenAIJudge struct {
	// contains filtered or unexported fields
}

OpenAIJudge evaluates responses using OpenAI

func NewOpenAIJudge

func NewOpenAIJudge(client *bifrost.Bifrost, judgeModel string, t *testing.T) *OpenAIJudge

NewOpenAIJudge creates a new judge instance

func (*OpenAIJudge) EvaluateResponse

func (judge *OpenAIJudge) EvaluateResponse(ctx *schemas.BifrostContext, evaluation EvaluationRequest) (*EvaluationResult, error)

EvaluateResponse judges an LLM response

type OpusReasoningTestConfig

type OpusReasoningTestConfig struct {
	Provider    schemas.ModelProvider
	Opus45Model string // Opus 4.5 model identifier
	Opus46Model string // Opus 4.6 model identifier
	Fallbacks   []schemas.Fallback
	SkipOpus45  bool   // Skip Opus 4.5 tests
	SkipOpus46  bool   // Skip Opus 4.6 tests
	SkipReason  string // Reason for skipping
}

OpusReasoningTestConfig holds configuration for Opus-specific reasoning tests

func GetOpusReasoningTestConfigs

func GetOpusReasoningTestConfigs() []OpusReasoningTestConfig

GetOpusReasoningTestConfigs returns test configurations for Opus reasoning across providers

type ParsedError

type ParsedError struct {
	Category    string                 // Error category (HTTP, Auth, RateLimit, etc.)
	Title       string                 // Short, readable title
	Message     string                 // Main error message
	Details     []string               // Additional details
	Suggestions []string               // Potential solutions
	Technical   map[string]interface{} // Technical details for debugging
}

ParsedError represents a cleaned-up, human-readable error

func ParseBifrostError

func ParseBifrostError(err *schemas.BifrostError) ParsedError

ParseBifrostError converts a BifrostError into a human-readable ParsedError

type PartialToolCallCondition

type PartialToolCallCondition struct {
	ExpectedCount int // Expected number of tool calls
}

PartialToolCallCondition checks if we got fewer tool calls than expected

func (*PartialToolCallCondition) GetConditionName

func (c *PartialToolCallCondition) GetConditionName() string

func (*PartialToolCallCondition) ShouldRetry

type ProviderConfig

type ProviderConfig struct {
	Provider        schemas.ModelProvider
	ChatModel       string
	VisionModel     string
	ToolsSupported  bool
	VisionSupported bool
	StreamSupported bool
	Available       bool
}

ProviderConfig defines a provider's capabilities

type ProviderRoundRobin

type ProviderRoundRobin struct {
	// contains filtered or unexported fields
}

ProviderRoundRobin manages provider selection and tracking

func NewProviderRoundRobin

func NewProviderRoundRobin(providers []ProviderConfig, t *testing.T) *ProviderRoundRobin

NewProviderRoundRobin creates a new round-robin manager

func (*ProviderRoundRobin) GetNextProviderForModality

func (prr *ProviderRoundRobin) GetNextProviderForModality(modality MessageModality) (ProviderConfig, error)

GetNextProviderForModality returns the next provider that supports the required modality

func (*ProviderRoundRobin) GetUsageStats

func (prr *ProviderRoundRobin) GetUsageStats() map[schemas.ModelProvider]int

type ResponseExpectations

type ResponseExpectations struct {
	// Basic structure expectations
	ShouldHaveContent    bool    // Response should have non-empty content
	ExpectedChoiceCount  int     // Expected number of choices (0 = any)
	ExpectedFinishReason *string // Expected finish reason

	// Content expectations
	ShouldContainKeywords []string       // Content should contain ALL these keywords (AND logic)
	ShouldContainAnyOf    []string       // Content should contain AT LEAST ONE of these keywords (OR logic)
	ShouldNotContainWords []string       // Content should NOT contain these words
	ContentPattern        *regexp.Regexp // Content should match this pattern
	IsRelevantToPrompt    bool           // Content should be relevant to the original prompt

	// Tool calling expectations
	ExpectedToolCalls          []ToolCallExpectation // Expected tool calls
	ShouldNotHaveFunctionCalls bool                  // Should not have any function calls

	// Technical expectations
	ShouldHaveUsageStats bool // Should have token usage information
	ShouldHaveTimestamps bool // Should have created timestamp
	ShouldHaveModel      bool // Should have model field
	ShouldHaveLatency    bool // Should have latency information in ExtraFields

	// Raw request/response expectations
	ShouldHaveRawRequest  bool // Should have non-nil, compact JSON rawRequest in ExtraFields
	ShouldHaveRawResponse bool // Should have non-nil, compact JSON rawResponse in ExtraFields

	// Provider-specific expectations
	ProviderSpecific map[string]interface{} // Provider-specific validation data
}

ResponseExpectations defines what we expect from a response

func ApplyRawExpectations

func ApplyRawExpectations(expectations ResponseExpectations, testConfig ComprehensiveTestConfig, isStreaming bool, options ...bool) ResponseExpectations

ApplyRawExpectations applies raw request/response expectations based on test config. Call this after creating expectations directly (SpeechExpectations, TranscriptionExpectations, etc.) when not using GetExpectationsForScenario. Parameters:

  • isStreaming: if true, skips RawResponse expectation (streaming has no single response body)
  • options: variadic bool options:
  • options[0] = isMultipartRequest: if true, skips RawRequest expectation (multipart form data can't return raw JSON request)
  • options[1] = isBinaryResponse: if true, skips RawResponse expectation (binary responses like audio don't have JSON raw response)

func BasicChatExpectations

func BasicChatExpectations() ResponseExpectations

BasicChatExpectations returns validation expectations for basic chat scenarios

func CalculatorToolExpectations

func CalculatorToolExpectations() ResponseExpectations

CalculatorToolExpectations returns validation expectations for calculator tool calls

func ChatAudioExpectations

func ChatAudioExpectations() ResponseExpectations

ChatAudioExpectations returns validation expectations for chat audio scenarios

func CombineExpectations

func CombineExpectations(expectations ...ResponseExpectations) ResponseExpectations

CombineExpectations merges multiple expectations (later ones override earlier ones)

func ConsistencyExpectations

func ConsistencyExpectations(expectedConsistencyMarkers []string) ResponseExpectations

ConsistencyExpectations returns expectations for consistency tests

func ConversationExpectations

func ConversationExpectations(contextKeywords []string) ResponseExpectations

ConversationExpectations returns validation expectations for multi-turn conversation scenarios

func CountTokensExpectations

func CountTokensExpectations() ResponseExpectations

CountTokensExpectations returns validation expectations for count tokens scenarios

func EmbeddingExpectations

func EmbeddingExpectations(expectedTexts []string) ResponseExpectations

EmbeddingExpectations returns validation expectations for embedding scenarios

func FileInputExpectations

func FileInputExpectations() ResponseExpectations

FileInputExpectations returns validation expectations for file input scenarios

func GetExpectationsForScenario

func GetExpectationsForScenario(scenarioName string, testConfig ComprehensiveTestConfig, customParams map[string]interface{}) ResponseExpectations

GetExpectationsForScenario returns appropriate validation expectations for a given scenario

func ImageAnalysisExpectations

func ImageAnalysisExpectations() ResponseExpectations

ImageAnalysisExpectations returns validation expectations for image analysis scenarios

func ImageGenerationExpectations

func ImageGenerationExpectations(minImages int, expectedSize string) ResponseExpectations

func ModifyExpectationsForProvider

func ModifyExpectationsForProvider(expectations ResponseExpectations, provider schemas.ModelProvider) ResponseExpectations

ModifyExpectationsForProvider adjusts expectations based on provider capabilities. Each provider is explicitly configured for: usage stats, timestamps, model, and latency. If a provider is not listed, defaults are kept (all true from BasicChatExpectations).

func MultipleToolExpectations

func MultipleToolExpectations(tools []string, requiredArgsPerTool [][]string) ResponseExpectations

MultipleToolExpectations returns validation expectations for multiple tool calls

func ReasoningExpectations

func ReasoningExpectations() ResponseExpectations

ReasoningExpectations returns validation expectations for reasoning scenarios

func SemanticCoherenceExpectations

func SemanticCoherenceExpectations(inputPrompt string, expectedTopics []string) ResponseExpectations

SemanticCoherenceExpectations returns expectations for semantic coherence tests

func SpeechExpectations

func SpeechExpectations(minAudioBytes int) ResponseExpectations

SpeechExpectations returns validation expectations for speech synthesis scenarios

func StreamingExpectations

func StreamingExpectations() ResponseExpectations

StreamingExpectations returns validation expectations for streaming scenarios

func TextCompletionExpectations

func TextCompletionExpectations() ResponseExpectations

TextCompletionExpectations returns validation expectations for text completion scenarios

func TimeToolExpectations

func TimeToolExpectations() ResponseExpectations

TimeToolExpectations returns validation expectations for time tool calls

func ToolCallExpectations

func ToolCallExpectations(toolName string, requiredArgs []string) ResponseExpectations

ToolCallExpectations returns validation expectations for tool calling scenarios

func TranscriptionExpectations

func TranscriptionExpectations(minTextLength int) ResponseExpectations

TranscriptionExpectations returns validation expectations for transcription scenarios

func VisionExpectations

func VisionExpectations(expectedKeywords []string) ResponseExpectations

VisionExpectations returns validation expectations for vision/image processing scenarios

func WeatherToolExpectations

func WeatherToolExpectations() ResponseExpectations

WeatherToolExpectations returns validation expectations for weather tool calls

func WebSearchExpectations

func WebSearchExpectations() ResponseExpectations

WebSearchExpectations returns validation expectations for web search responses

type ResponsesContentValidationCondition

type ResponsesContentValidationCondition struct{}

ResponsesContentValidationCondition checks if response fails basic content validation for Responses API

func (*ResponsesContentValidationCondition) GetConditionName

func (c *ResponsesContentValidationCondition) GetConditionName() string

func (*ResponsesContentValidationCondition) ShouldRetry

type ResponsesEmptyCondition

type ResponsesEmptyCondition struct{}

ResponsesEmptyCondition checks for empty Responses API responses

func (*ResponsesEmptyCondition) GetConditionName

func (c *ResponsesEmptyCondition) GetConditionName() string

func (*ResponsesEmptyCondition) ShouldRetry

type ResponsesFileNotProcessedCondition

type ResponsesFileNotProcessedCondition struct{}

ResponsesFileNotProcessedCondition checks if file/document was not properly processed in Responses API

func (*ResponsesFileNotProcessedCondition) GetConditionName

func (c *ResponsesFileNotProcessedCondition) GetConditionName() string

func (*ResponsesFileNotProcessedCondition) ShouldRetry

type ResponsesGenericResponseCondition

type ResponsesGenericResponseCondition struct{}

ResponsesGenericResponseCondition checks for generic/template responses in Responses API

func (*ResponsesGenericResponseCondition) GetConditionName

func (c *ResponsesGenericResponseCondition) GetConditionName() string

func (*ResponsesGenericResponseCondition) ShouldRetry

type ResponsesRetryCondition

type ResponsesRetryCondition interface {
	ShouldRetry(response *schemas.BifrostResponsesResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

ResponsesRetryCondition defines an interface for checking if a Responses API test operation should be retried

type ResponsesRetryConfig

type ResponsesRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []ResponsesRetryCondition                        // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

ResponsesRetryConfig configures retry behavior for Responses API test scenarios

func FileInputResponsesRetryConfig

func FileInputResponsesRetryConfig() ResponsesRetryConfig

FileInputResponsesRetryConfig creates a retry config for file/document input tests using Responses API

func WebSearchRetryConfig

func WebSearchRetryConfig() ResponsesRetryConfig

WebSearchRetryConfig returns specialized retry configuration for web search tests

type ResponsesStreamValidationResult

type ResponsesStreamValidationResult struct {
	Passed       bool
	Errors       []string
	ReceivedData bool
	StreamErrors []string
	LastLatency  int64
}

ResponsesStreamValidationResult represents the result of responses streaming validation

func WithResponsesStreamValidationRetry

func WithResponsesStreamValidationRetry(
	t *testing.T,
	config TestRetryConfig,
	context TestRetryContext,
	operation func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError),
	validateStream func(chan *schemas.BifrostStreamChunk) ResponsesStreamValidationResult,
) ResponsesStreamValidationResult

WithResponsesStreamValidationRetry wraps a responses streaming operation with retry logic that includes stream content validation This function wraps the entire operation (request + stream reading + validation) and retries on validation failures

type ResponsesToolCallInfo

type ResponsesToolCallInfo struct {
	ID        string
	Name      string
	Arguments string
}

ResponsesToolCallInfo accumulates tool call information from Responses API streaming

type SampleToolType

type SampleToolType string
const (
	SampleToolTypeWeather       SampleToolType = "weather"
	SampleToolTypeCalculate     SampleToolType = "calculate"
	SampleToolTypeTime          SampleToolType = "time"
	SampleToolTypePingWithEmpty SampleToolType = "ping_empty"
	SampleToolTypePingWithNil   SampleToolType = "ping_nil"
)

type ScenarioStep

type ScenarioStep struct {
	StepNumber       int
	ExpectedAction   string
	RequiredModality MessageModality
	SuccessCriteria  StepSuccess
}

ScenarioStep defines a single step in the scenario

type ScenarioSuccess

type ScenarioSuccess struct {
	MinStepsCompleted   int
	RequiredModalities  []MessageModality
	OverallQualityScore float64
	MustCompleteGoal    bool
}

ScenarioSuccess defines overall scenario success criteria

type SpeechRetryCondition

type SpeechRetryCondition interface {
	ShouldRetry(response *schemas.BifrostSpeechResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

SpeechRetryCondition defines an interface for checking if a speech test operation should be retried

type SpeechRetryConfig

type SpeechRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []SpeechRetryCondition                           // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

SpeechRetryConfig configures retry behavior for speech test scenarios

type SpeechStreamValidationResult

type SpeechStreamValidationResult struct {
	Passed       bool
	Errors       []string
	ReceivedData bool
	StreamErrors []string
	LastLatency  int64
}

SpeechStreamValidationResult represents the result of speech streaming validation

func WithSpeechStreamValidationRetry

func WithSpeechStreamValidationRetry(
	t *testing.T,
	config TestRetryConfig,
	context TestRetryContext,
	operation func() (chan *schemas.BifrostStreamChunk, *schemas.BifrostError),
	validateStream func(chan *schemas.BifrostStreamChunk) SpeechStreamValidationResult,
) SpeechStreamValidationResult

WithSpeechStreamValidationRetry wraps a speech streaming operation with retry logic that includes stream content validation This function wraps the entire operation (request + stream reading + validation) and retries on validation failures

type StepSuccess

type StepSuccess struct {
	MustContainKeywords    []string
	MustNotContainWords    []string
	ExpectedToolCalls      []string
	RequiresDataExtraction bool
	QualityThreshold       float64
}

StepSuccess defines validation criteria for a step

type StreamErrorCondition

type StreamErrorCondition struct{}

StreamErrorCondition checks for streaming-specific errors that should trigger retries

func (*StreamErrorCondition) GetConditionName

func (c *StreamErrorCondition) GetConditionName() string

func (*StreamErrorCondition) ShouldRetry

func (c *StreamErrorCondition) ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)

type StreamingToolCallAccumulator

type StreamingToolCallAccumulator struct {
	// For Chat Completions: map of tool call index -> accumulated tool call
	ChatToolCalls map[int]*schemas.ChatAssistantMessageToolCall
	// For Responses API: map of call ID or item ID -> accumulated tool call info
	ResponsesToolCalls map[string]*ResponsesToolCallInfo
	// Map itemID to the key used in ResponsesToolCalls for quick lookup
	ItemIDToKey map[string]string
}

StreamingToolCallAccumulator accumulates tool call fragments from streaming responses

func NewStreamingToolCallAccumulator

func NewStreamingToolCallAccumulator() *StreamingToolCallAccumulator

NewStreamingToolCallAccumulator creates a new accumulator

func (*StreamingToolCallAccumulator) AccumulateChatToolCall

func (acc *StreamingToolCallAccumulator) AccumulateChatToolCall(choiceIndex int, toolCall schemas.ChatAssistantMessageToolCall)

AccumulateChatToolCall accumulates a tool call from a Chat Completions streaming chunk

func (*StreamingToolCallAccumulator) AccumulateResponsesToolCall

func (acc *StreamingToolCallAccumulator) AccumulateResponsesToolCall(callID *string, name *string, arguments *string, itemID *string)

AccumulateResponsesToolCall accumulates a tool call from a Responses API streaming chunk

func (*StreamingToolCallAccumulator) GetFinalChatToolCalls

func (acc *StreamingToolCallAccumulator) GetFinalChatToolCalls() []ToolCallInfo

GetFinalChatToolCalls returns the final accumulated tool calls for Chat Completions

func (*StreamingToolCallAccumulator) GetFinalResponsesToolCalls

func (acc *StreamingToolCallAccumulator) GetFinalResponsesToolCalls() []ToolCallInfo

GetFinalResponsesToolCalls returns the final accumulated tool calls for Responses API

type StreamingValidationResult

type StreamingValidationResult struct {
	Passed bool
	Errors []string
}

StreamingValidationResult represents the result of streaming validation

type TestRetryCondition

type TestRetryCondition interface {
	ShouldRetry(response *schemas.BifrostResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

TestRetryCondition defines an interface for checking if a test operation should be retried This focuses specifically on LLM behavior inconsistencies, not HTTP errors (handled by Bifrost core)

type TestRetryConfig

type TestRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []TestRetryCondition                             // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

TestRetryConfig configures retry behavior for test scenarios (DEPRECATED: Use specific retry configs)

func ConversationRetryConfig

func ConversationRetryConfig() TestRetryConfig

ConversationRetryConfig creates a retry config for conversation-based tests

func DefaultCountTokensRetryConfig

func DefaultCountTokensRetryConfig() TestRetryConfig

DefaultCountTokensRetryConfig creates a retry config for count tokens tests

func DefaultEmbeddingRetryConfig

func DefaultEmbeddingRetryConfig() TestRetryConfig

DefaultEmbeddingRetryConfig creates a retry config for embedding tests

func DefaultImageGenerationRetryConfig

func DefaultImageGenerationRetryConfig() TestRetryConfig

DefaultImageGenerationRetryConfig creates a retry config for image tests

func DefaultListModelsRetryConfig

func DefaultListModelsRetryConfig() TestRetryConfig

DefaultListModelsRetryConfig creates a retry config for list models tests IMPORTANT: List models should ALWAYS retry on any failure (errors, nil response, empty data, validation failures)

func DefaultSpeechRetryConfig

func DefaultSpeechRetryConfig() TestRetryConfig

DefaultSpeechRetryConfig creates a retry config for speech synthesis tests

func DefaultTestRetryConfig

func DefaultTestRetryConfig() TestRetryConfig

DefaultTestRetryConfig returns a sensible default retry configuration for LLM tests

func DefaultTranscriptionRetryConfig

func DefaultTranscriptionRetryConfig() TestRetryConfig

DefaultTranscriptionRetryConfig creates a retry config for transcription tests

func FileInputRetryConfig

func FileInputRetryConfig() TestRetryConfig

FileInputRetryConfig creates a retry config for file/document input tests

func GetTestRetryConfigForScenario

func GetTestRetryConfigForScenario(scenarioName string, testConfig ComprehensiveTestConfig) TestRetryConfig

GetTestRetryConfigForScenario returns an appropriate retry config for a scenario

func ImageProcessingRetryConfig

func ImageProcessingRetryConfig() TestRetryConfig

ImageProcessingRetryConfig creates a retry config for image processing tests

func MultiToolRetryConfig

func MultiToolRetryConfig(expectedToolCount int, expectedTools []string) TestRetryConfig

MultiToolRetryConfig creates a retry config for multiple tool call tests

func ReasoningRetryConfig

func ReasoningRetryConfig() TestRetryConfig

ReasoningRetryConfig creates a retry config for reasoning tests

func SpeechStreamRetryConfig

func SpeechStreamRetryConfig() TestRetryConfig

SpeechStreamRetryConfig creates a retry config for streaming speech synthesis tests

func StreamingRetryConfig

func StreamingRetryConfig() TestRetryConfig

StreamingRetryConfig creates a retry config for streaming tests

func ToolCallRetryConfig

func ToolCallRetryConfig(expectedToolName string) TestRetryConfig

ToolCallRetryConfig creates a retry config optimized for tool calling tests

type TestRetryContext

type TestRetryContext struct {
	ScenarioName     string                 // Name of the test scenario
	AttemptNumber    int                    // Current attempt number (1-based)
	ExpectedBehavior map[string]interface{} // What we expected to happen
	TestMetadata     map[string]interface{} // Additional context for retry decisions
}

TestRetryContext provides context information for retry decisions

type TestScenarioFunc

type TestScenarioFunc func(*testing.T, *bifrost.Bifrost, context.Context, ComprehensiveTestConfig)

TestScenarioFunc defines the function signature for test scenario functions

type TestScenarios

type TestScenarios struct {
	TextCompletion               bool
	TextCompletionStream         bool
	SimpleChat                   bool
	CompletionStream             bool
	MultiTurnConversation        bool
	ToolCalls                    bool
	ToolCallsStreaming           bool // Streaming tool calls functionality
	MultipleToolCalls            bool
	MultipleToolCallsStreaming   bool // Streaming multiple tool calls (some providers only return 1 tool call in streaming)
	End2EndToolCalling           bool
	AutomaticFunctionCall        bool
	ImageURL                     bool
	ImageBase64                  bool
	MultipleImages               bool
	FileBase64                   bool
	FileURL                      bool
	CompleteEnd2End              bool
	SpeechSynthesis              bool // Text-to-speech functionality
	SpeechSynthesisStream        bool // Streaming text-to-speech functionality
	Transcription                bool // Speech-to-text functionality
	TranscriptionStream          bool // Streaming speech-to-text functionality
	Embedding                    bool // Embedding functionality
	Reasoning                    bool // Reasoning/thinking functionality via Responses API
	PromptCaching                bool // Prompt caching functionality
	ListModels                   bool // List available models functionality
	ImageGeneration              bool // Image generation functionality
	ImageGenerationStream        bool // Streaming image generation functionality
	ImageEdit                    bool // Image edit functionality
	ImageEditStream              bool // Streaming image edit functionality
	ImageVariation               bool // Image variation functionality
	ImageVariationStream         bool // Streaming image variation functionality (if supported)
	VideoGeneration              bool // Video generation functionality
	VideoRetrieve                bool // Video retrieve functionality
	VideoRemix                   bool // Video remix functionality (OpenAI only)
	VideoDownload                bool // Video download functionality
	VideoList                    bool // Video list functionality
	VideoDelete                  bool // Video delete functionality
	BatchCreate                  bool // Batch API create functionality
	BatchList                    bool // Batch API list functionality
	BatchRetrieve                bool // Batch API retrieve functionality
	BatchCancel                  bool // Batch API cancel functionality
	BatchResults                 bool // Batch API results functionality
	FileUpload                   bool // File API upload functionality
	FileList                     bool // File API list functionality
	FileRetrieve                 bool // File API retrieve functionality
	FileDelete                   bool // File API delete functionality
	FileContent                  bool // File API content download functionality
	FileBatchInput               bool // Whether batch create supports file-based input (InputFileID)
	CountTokens                  bool // Count tokens functionality
	ChatAudio                    bool // Chat completion with audio input/output functionality
	StructuredOutputs            bool // Structured outputs (JSON schema) functionality
	WebSearchTool                bool // Web search tool functionality
	ContainerCreate              bool // Container API create functionality
	ContainerList                bool // Container API list functionality
	ContainerRetrieve            bool // Container API retrieve functionality
	ContainerDelete              bool // Container API delete functionality
	ContainerFileCreate          bool // Container File API create functionality
	ContainerFileList            bool // Container File API list functionality
	ContainerFileRetrieve        bool // Container File API retrieve functionality
	ContainerFileContent         bool // Container File API content functionality
	ContainerFileDelete          bool // Container File API delete functionality
	PassThroughExtraParams       bool // Pass through extra params functionality
	Rerank                       bool // Rerank functionality
	PassthroughAPI               bool // Raw HTTP passthrough API (Passthrough + PassthroughStream)
	WebSocketResponses           bool // WebSocket Responses API mode
	Realtime                     bool // Realtime API (bidirectional audio/text)
	Compaction                   bool // Server-side compaction (context management)
	InterleavedThinking          bool // Interleaved thinking between tool calls (beta)
	FastMode                     bool // Fast mode for Opus 4.6 (beta: research preview)
	EagerInputStreaming          bool // Anthropic fine-grained tool streaming coverage
	ServerToolsViaOpenAIEndpoint bool // Anthropic-style server tools through OpenAI-compatible endpoint
}

TestScenarios defines the comprehensive test scenarios

type TestSettings

type TestSettings struct {
	EnableRetries        bool
	MaxRetriesPerMessage int
	ValidationStrength   ValidationLevel
}

TestSettings controls test execution

type TextCompletionRetryCondition

type TextCompletionRetryCondition interface {
	ShouldRetry(response *schemas.BifrostTextCompletionResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

TextCompletionRetryCondition defines an interface for checking if a text completion test operation should be retried

type TextCompletionRetryConfig

type TextCompletionRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []TextCompletionRetryCondition                   // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

TextCompletionRetryConfig configures retry behavior for text completion test scenarios

type ToolCallExpectation

type ToolCallExpectation struct {
	FunctionName     string                 // Expected function name
	RequiredArgs     []string               // Arguments that must be present
	ForbiddenArgs    []string               // Arguments that should NOT be present
	ArgumentTypes    map[string]string      // Expected types for arguments ("string", "number", "boolean", "array", "object")
	ArgumentValues   map[string]interface{} // Specific expected values for arguments
	ValidateArgsJSON bool                   // Whether arguments should be valid JSON
}

ToolCallExpectation defines expectations for a specific tool call

type ToolCallInfo

type ToolCallInfo struct {
	Name      string
	Arguments string
	ID        string
	Index     int // OpenAI tool_calls index (0, 1, 2, ...); -1 when not available
}

ToolCallInfo represents extracted tool call information for both API formats

func ExtractChatToolCalls

func ExtractChatToolCalls(response *schemas.BifrostChatResponse) []ToolCallInfo

ExtractChatToolCalls extracts tool call information from a BifrostChatResponse

func ExtractResponsesToolCalls

func ExtractResponsesToolCalls(response *schemas.BifrostResponsesResponse) []ToolCallInfo

ExtractResponsesToolCalls extracts tool call information from a BifrostResponsesResponse

func ExtractToolCalls

func ExtractToolCalls(response *schemas.BifrostResponse) []ToolCallInfo

type TranscriptionRetryCondition

type TranscriptionRetryCondition interface {
	ShouldRetry(response *schemas.BifrostTranscriptionResponse, err *schemas.BifrostError, context TestRetryContext) (bool, string)
	GetConditionName() string
}

TranscriptionRetryCondition defines an interface for checking if a transcription test operation should be retried

type TranscriptionRetryConfig

type TranscriptionRetryConfig struct {
	MaxAttempts int                                              // Maximum retry attempts (including initial attempt)
	BaseDelay   time.Duration                                    // Base delay between retries
	MaxDelay    time.Duration                                    // Maximum delay between retries
	Conditions  []TranscriptionRetryCondition                    // Conditions that trigger retries
	OnRetry     func(attempt int, reason string, t *testing.T)   // Called before each retry
	OnFinalFail func(attempts int, finalErr error, t *testing.T) // Called on final failure
}

TranscriptionRetryConfig configures retry behavior for transcription test scenarios

type ValidationLevel

type ValidationLevel string

ValidationLevel defines how strict the evaluation should be

const (
	ValidationStrict   ValidationLevel = "strict"
	ValidationModerate ValidationLevel = "moderate"
	ValidationLenient  ValidationLevel = "lenient"
)

type ValidationResult

type ValidationResult struct {
	Passed           bool                   // Overall validation result
	Errors           []string               // List of validation errors
	Warnings         []string               // List of validation warnings
	MetricsCollected map[string]interface{} // Collected metrics for analysis
}

ValidationResult contains the results of response validation

func ValidateBatchCancelResponse

func ValidateBatchCancelResponse(t *testing.T, response *schemas.BifrostBatchCancelResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateBatchCancelResponse performs comprehensive validation for batch cancel responses

func ValidateBatchCreateResponse

func ValidateBatchCreateResponse(t *testing.T, response *schemas.BifrostBatchCreateResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateBatchCreateResponse performs comprehensive validation for batch create responses

func ValidateBatchListResponse

func ValidateBatchListResponse(t *testing.T, response *schemas.BifrostBatchListResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateBatchListResponse performs comprehensive validation for batch list responses

func ValidateBatchResultsResponse

func ValidateBatchResultsResponse(t *testing.T, response *schemas.BifrostBatchResultsResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateBatchResultsResponse performs comprehensive validation for batch results responses

func ValidateBatchRetrieveResponse

func ValidateBatchRetrieveResponse(t *testing.T, response *schemas.BifrostBatchRetrieveResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateBatchRetrieveResponse performs comprehensive validation for batch retrieve responses

func ValidateChatResponse

func ValidateChatResponse(t *testing.T, response *schemas.BifrostChatResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateChatResponse performs comprehensive validation for chat completion responses

func ValidateCountTokensResponse

func ValidateCountTokensResponse(t *testing.T, response *schemas.BifrostCountTokensResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateCountTokensResponse performs comprehensive validation for count tokens responses

func ValidateEmbeddingResponse

func ValidateEmbeddingResponse(t *testing.T, response *schemas.BifrostEmbeddingResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateEmbeddingResponse performs comprehensive validation for embedding responses

func ValidateFileContentResponse

func ValidateFileContentResponse(t *testing.T, response *schemas.BifrostFileContentResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateFileContentResponse performs comprehensive validation for file content responses

func ValidateFileDeleteResponse

func ValidateFileDeleteResponse(t *testing.T, response *schemas.BifrostFileDeleteResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateFileDeleteResponse performs comprehensive validation for file delete responses

func ValidateFileListResponse

func ValidateFileListResponse(t *testing.T, response *schemas.BifrostFileListResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateFileListResponse performs comprehensive validation for file list responses

func ValidateFileRetrieveResponse

func ValidateFileRetrieveResponse(t *testing.T, response *schemas.BifrostFileRetrieveResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateFileRetrieveResponse performs comprehensive validation for file retrieve responses

func ValidateFileUploadResponse

func ValidateFileUploadResponse(t *testing.T, response *schemas.BifrostFileUploadResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateFileUploadResponse performs comprehensive validation for file upload responses

func ValidateImageGenerationResponse

func ValidateImageGenerationResponse(t *testing.T, response *schemas.BifrostImageGenerationResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateImageGenerationResponse performs comprehensive validation for image generation responses

func ValidateListModelsResponse

func ValidateListModelsResponse(t *testing.T, response *schemas.BifrostListModelsResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateListModelsResponse performs comprehensive validation for list models responses

func ValidateResponsesResponse

func ValidateResponsesResponse(t *testing.T, response *schemas.BifrostResponsesResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateResponsesResponse performs comprehensive validation for Responses API responses

func ValidateSpeechResponse

func ValidateSpeechResponse(t *testing.T, response *schemas.BifrostSpeechResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateSpeechResponse performs comprehensive validation for speech synthesis responses

func ValidateTextCompletionResponse

func ValidateTextCompletionResponse(t *testing.T, response *schemas.BifrostTextCompletionResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateTextCompletionResponse performs comprehensive validation for text completion responses

func ValidateTranscriptionResponse

func ValidateTranscriptionResponse(t *testing.T, response *schemas.BifrostTranscriptionResponse, err *schemas.BifrostError, expectations ResponseExpectations, scenarioName string) ValidationResult

ValidateTranscriptionResponse performs comprehensive validation for transcription responses

type WrongToolCalledCondition

type WrongToolCalledCondition struct {
	ExpectedToolName string
	ForbiddenTools   []string // Tools that should not be called
}

WrongToolCalledCondition checks if the wrong tool was called

func (*WrongToolCalledCondition) GetConditionName

func (c *WrongToolCalledCondition) GetConditionName() string

func (*WrongToolCalledCondition) ShouldRetry

type WrongToolSequenceCondition

type WrongToolSequenceCondition struct {
	ExpectedTools []string // Expected sequence of tool names
}

WrongToolSequenceCondition checks if tools were called in wrong order

func (*WrongToolSequenceCondition) GetConditionName

func (c *WrongToolSequenceCondition) GetConditionName() string

func (*WrongToolSequenceCondition) ShouldRetry

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL