assertions

package
v1.1.9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 24, 2026 License: Apache-2.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func NewArenaAssertionRegistry

func NewArenaAssertionRegistry() *runtimeValidators.Registry

NewArenaAssertionRegistry creates a new registry with arena-specific assertion validators

func NewAudioDurationValidator added in v1.1.0

func NewAudioDurationValidator(params map[string]interface{}) runtimeValidators.Validator

NewAudioDurationValidator creates a new audio_duration validator from params

func NewAudioFormatValidator added in v1.1.0

func NewAudioFormatValidator(params map[string]interface{}) runtimeValidators.Validator

NewAudioFormatValidator creates a new audio_format validator from params

func NewContentIncludesValidator

func NewContentIncludesValidator(params map[string]interface{}) runtimeValidators.Validator

NewContentIncludesValidator creates a new content_includes validator from params

func NewContentMatchesValidator

func NewContentMatchesValidator(params map[string]interface{}) runtimeValidators.Validator

NewContentMatchesValidator creates a new content_matches validator from params

func NewGuardrailTriggeredValidator added in v1.1.0

func NewGuardrailTriggeredValidator(params map[string]interface{}) runtimeValidators.Validator

NewGuardrailTriggeredValidator creates a new GuardrailTriggeredValidator instance

func NewImageDimensionsValidator added in v1.1.0

func NewImageDimensionsValidator(params map[string]interface{}) runtimeValidators.Validator

NewImageDimensionsValidator creates a new image_dimensions validator from params

func NewImageFormatValidator added in v1.1.0

func NewImageFormatValidator(params map[string]interface{}) runtimeValidators.Validator

NewImageFormatValidator creates a new image_format validator from params

func NewIsValidJSONValidator added in v1.1.1

func NewIsValidJSONValidator(params map[string]interface{}) runtimeValidators.Validator

NewIsValidJSONValidator creates a new is_valid_json validator

func NewJSONPathValidator added in v1.1.1

func NewJSONPathValidator(params map[string]interface{}) runtimeValidators.Validator

NewJSONPathValidator creates a new json_path validator

func NewJSONSchemaValidator added in v1.1.1

func NewJSONSchemaValidator(params map[string]interface{}) runtimeValidators.Validator

NewJSONSchemaValidator creates a new json_schema validator

func NewLLMJudgeValidator added in v1.1.3

func NewLLMJudgeValidator(params map[string]interface{}) runtimeValidators.Validator

NewLLMJudgeValidator evaluates a single assistant response via an LLM judge. Params: - criteria (string, required) or rubric (string) - judge (string, optional) -> name from metadata judge_targets - temperature, max_tokens (optional) for judge call - conversation_aware (bool) to include prior messages Requires metadata to carry judge_targets (map[string]providers.ProviderSpec).

func NewToolsCalledValidator

func NewToolsCalledValidator(params map[string]interface{}) runtimeValidators.Validator

NewToolsCalledValidator creates a new tools_called validator from params

func NewToolsNotCalledValidator

func NewToolsNotCalledValidator(params map[string]interface{}) runtimeValidators.Validator

NewToolsNotCalledValidator creates a new tools_not_called validator from params

func NewVideoDurationValidator added in v1.1.0

func NewVideoDurationValidator(params map[string]interface{}) runtimeValidators.Validator

NewVideoDurationValidator creates a new video_duration validator from params

func NewVideoResolutionValidator added in v1.1.0

func NewVideoResolutionValidator(params map[string]interface{}) runtimeValidators.Validator

NewVideoResolutionValidator creates a new video_resolution validator from params

Types

type AssertionConfig added in v1.1.0

type AssertionConfig struct {
	Type    string                 `json:"type" yaml:"type"`
	Params  map[string]interface{} `json:"params" yaml:"params"`
	Message string                 `json:"message,omitempty" yaml:"message,omitempty"` // Human-readable description of what this assertion checks
}

AssertionConfig extends ValidatorConfig with arena-specific fields

func (AssertionConfig) ToValidatorConfig added in v1.1.0

func (a AssertionConfig) ToValidatorConfig() runtimeValidators.ValidatorConfig

ToValidatorConfig converts AssertionConfig to runtime ValidatorConfig

type AssertionResult added in v1.1.0

type AssertionResult struct {
	Passed  bool        `json:"passed"`
	Details interface{} `json:"details,omitempty"`
	Message string      `json:"message,omitempty"` // Human-readable description from config
}

AssertionResult extends ValidationResult with the assertion message

func FromValidationResult added in v1.1.0

func FromValidationResult(vr runtimeValidators.ValidationResult, message string) AssertionResult

FromValidationResult creates an AssertionResult from a ValidationResult

type AudioDurationValidator added in v1.1.0

type AudioDurationValidator struct {
	// contains filtered or unexported fields
}

AudioDurationValidator checks that audio duration is within range

func (*AudioDurationValidator) Validate added in v1.1.0

func (v *AudioDurationValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if audio duration is within allowed range

type AudioFormatValidator added in v1.1.0

type AudioFormatValidator struct {
	// contains filtered or unexported fields
}

AudioFormatValidator checks that audio content has one of the allowed formats

func (*AudioFormatValidator) Validate added in v1.1.0

func (v *AudioFormatValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if the message contains audio with allowed formats

type ContentIncludesAnyConversationValidator added in v1.1.3

type ContentIncludesAnyConversationValidator struct{}

ContentIncludesAnyConversationValidator ensures at least one assistant message contains any of the provided patterns. Params: - patterns: []string - case_sensitive: bool (optional, default false) Type: "content_includes_any"

func (*ContentIncludesAnyConversationValidator) Type added in v1.1.3

Type returns the validator type name.

func (*ContentIncludesAnyConversationValidator) ValidateConversation added in v1.1.3

func (v *ContentIncludesAnyConversationValidator) ValidateConversation(
	ctx context.Context,
	convCtx *ConversationContext,
	params map[string]interface{},
) ConversationValidationResult

ValidateConversation checks if any assistant response contains any pattern.

type ContentIncludesValidator

type ContentIncludesValidator struct {
	// contains filtered or unexported fields
}

ContentIncludesValidator checks that content includes all expected patterns

func (*ContentIncludesValidator) Validate

func (v *ContentIncludesValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if all patterns are present in content (case-insensitive)

type ContentMatchesValidator

type ContentMatchesValidator struct {
	// contains filtered or unexported fields
}

ContentMatchesValidator checks that content matches a regex pattern

func (*ContentMatchesValidator) Validate

func (v *ContentMatchesValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if content matches the regex pattern

type ContentNotIncludesConversationValidator added in v1.1.3

type ContentNotIncludesConversationValidator struct{}

ContentNotIncludesConversationValidator ensures assistant messages do NOT include any forbidden patterns. Params: - patterns: []string - case_sensitive: bool (optional, default false) Type: "content_not_includes"

func (*ContentNotIncludesConversationValidator) Type added in v1.1.3

Type returns the validator type name.

func (*ContentNotIncludesConversationValidator) ValidateConversation added in v1.1.3

func (v *ContentNotIncludesConversationValidator) ValidateConversation(
	ctx context.Context,
	convCtx *ConversationContext,
	params map[string]interface{},
) ConversationValidationResult

ValidateConversation scans assistant messages for forbidden substrings.

type ConversationAssertion added in v1.1.3

type ConversationAssertion struct {
	Type    string                 `json:"type" yaml:"type"`       // Validator type (e.g., "tools_not_called_with_args")
	Params  map[string]interface{} `json:"params" yaml:"params"`   // Validator-specific parameters
	Message string                 `json:"message" yaml:"message"` // Human-readable description shown on failure
}

ConversationAssertion defines an assertion to evaluate across an entire conversation. Unlike turn-level assertions that check individual responses, conversation assertions evaluate patterns, behaviors, or constraints across all turns in a self-play scenario.

type ConversationAssertionFactory added in v1.1.3

type ConversationAssertionFactory func() ConversationValidator

ConversationAssertionFactory creates new evaluator instances for conversation assertions. Using factories allows evaluators to be stateless and thread-safe.

type ConversationAssertionRegistry added in v1.1.3

type ConversationAssertionRegistry struct {
	// contains filtered or unexported fields
}

ConversationAssertionRegistry manages available conversation-level assertions. Provides registration and lookup of evaluators by type name. Thread-safe for concurrent access.

func NewConversationAssertionRegistry added in v1.1.3

func NewConversationAssertionRegistry() *ConversationAssertionRegistry

NewConversationAssertionRegistry creates a new registry with built-in assertions. Returns a registry pre-populated with all standard conversation assertions.

func (*ConversationAssertionRegistry) Get added in v1.1.3

Get retrieves an evaluator by name, creating a new instance via its factory. Returns an error if the assertion type is not registered.

func (*ConversationAssertionRegistry) Has added in v1.1.3

Has checks if an assertion type is registered.

func (*ConversationAssertionRegistry) Register added in v1.1.3

Register adds an assertion factory to the registry. The name must match the Type() returned by evaluators created by the factory. Panics if name is empty or factory is nil.

func (*ConversationAssertionRegistry) Types added in v1.1.3

Types returns a list of all registered assertion type names. Useful for introspection and documentation.

func (*ConversationAssertionRegistry) ValidateConversation added in v1.1.3

ValidateConversation evaluates a single conversation-level assertion. Looks up the evaluator, instantiates it, and runs validation.

func (*ConversationAssertionRegistry) ValidateConversations added in v1.1.3

ValidateConversations evaluates multiple assertions against a conversation. Returns results for all assertions, continuing even if some fail.

type ConversationContext added in v1.1.3

type ConversationContext struct {
	// AllTurns contains the complete conversation history in chronological order.
	// Includes all messages from all roles (system, user, assistant, tool).
	AllTurns []types.Message

	// ToolCalls contains all tool invocations with their results.
	// Ordered chronologically to allow sequential analysis.
	ToolCalls []ToolCallRecord

	// Metadata provides scenario/execution context for the conversation.
	Metadata ConversationMetadata
}

ConversationContext provides all data needed to evaluate conversation-level assertions. This aggregates the complete conversation history, tool usage, and metadata for comprehensive validation across multiple turns.

type ConversationMetadata added in v1.1.3

type ConversationMetadata struct {
	ScenarioID     string                 `json:"scenario_id"`      // The scenario being tested
	PersonaID      string                 `json:"persona_id"`       // Persona used for self-play (if any)
	Variables      map[string]interface{} `json:"variables"`        // Variables passed to prompts
	PromptConfigID string                 `json:"prompt_config_id"` // Which prompt configuration was used
	ProviderID     string                 `json:"provider_id"`      // Which LLM provider was used
	TotalCost      float64                `json:"total_cost"`       // Total cost in USD across all turns
	TotalTokens    int                    `json:"total_tokens"`     // Total tokens used (input + output)
	Extras         map[string]interface{} `json:"extras,omitempty"` // Additional metadata (e.g., judge targets/defaults)
}

ConversationMetadata provides context about the conversation execution. Useful for conditional validation based on scenario characteristics.

type ConversationValidationResult added in v1.1.3

type ConversationValidationResult struct {
	Type    string                 `json:"type,omitempty"`    // Validator type (e.g., tools_not_called_with_args)
	Passed  bool                   `json:"passed"`            // Whether the assertion passed
	Message string                 `json:"message"`           // Human-readable result explanation
	Details map[string]interface{} `json:"details,omitempty"` // Structured details for debugging

	// For aggregated assertions (e.g., checking all turns), evidence of individual violations.
	// Helps users understand exactly which turns or actions failed the assertion.
	Violations []ConversationViolation `json:"violations,omitempty"`
}

ConversationValidationResult contains the outcome of a conversation-level assertion. Provides structured details for debugging and reporting when assertions fail.

type ConversationValidator added in v1.1.3

type ConversationValidator interface {
	// Type returns the validator name (e.g., "tools_not_called_with_args").
	// Must match the type specified in ConversationAssertion configs.
	Type() string

	// ValidateConversation evaluates the assertion against the full conversation.
	// Returns a result indicating success/failure with detailed evidence.
	ValidateConversation(
		ctx context.Context,
		convCtx *ConversationContext,
		params map[string]interface{},
	) ConversationValidationResult
}

ConversationValidator evaluates assertions across entire conversations. Implementations check patterns, constraints, or behaviors that span multiple turns, such as "no forbidden tool arguments used" or "consistent behavior maintained".

func NewContentIncludesAnyConversationValidator added in v1.1.3

func NewContentIncludesAnyConversationValidator() ConversationValidator

NewContentIncludesAnyConversationValidator constructs validator instance.

func NewContentNotIncludesConversationValidator added in v1.1.3

func NewContentNotIncludesConversationValidator() ConversationValidator

NewContentNotIncludesConversationValidator constructs validator instance.

func NewLLMJudgeConversationValidator added in v1.1.3

func NewLLMJudgeConversationValidator() ConversationValidator

NewLLMJudgeConversationValidator creates a conversation-level LLM judge validator. Params include criteria/rubric, optional judge name (from metadata judge_targets), and min_score.

func NewToolCallsWithArgsConversationValidator added in v1.1.3

func NewToolCallsWithArgsConversationValidator() ConversationValidator

NewToolCallsWithArgsConversationValidator constructs validator instance.

func NewToolsCalledConversationValidator added in v1.1.3

func NewToolsCalledConversationValidator() ConversationValidator

NewToolsCalledConversationValidator constructs a conversation-level tools_called validator.

func NewToolsNotCalledConversationValidator added in v1.1.3

func NewToolsNotCalledConversationValidator() ConversationValidator

NewToolsNotCalledConversationValidator constructs a conversation-level tools_not_called validator.

func NewToolsNotCalledWithArgsConversationValidator added in v1.1.3

func NewToolsNotCalledWithArgsConversationValidator() ConversationValidator

NewToolsNotCalledWithArgsConversationValidator constructs validator instance.

type ConversationViolation added in v1.1.3

type ConversationViolation struct {
	TurnIndex   int                    `json:"turn_index"`          // Which turn (index in AllTurns) had the violation
	Description string                 `json:"description"`         // What was violated (human-readable)
	Evidence    map[string]interface{} `json:"evidence,omitempty"`  // Data supporting the violation (e.g., actual values)
	Timestamp   time.Time              `json:"timestamp,omitempty"` // When the violation occurred (if available)
}

ConversationViolation represents a single assertion violation within the conversation. Captures exactly where and how the assertion was violated for precise debugging.

type GuardrailTriggeredValidator added in v1.1.0

type GuardrailTriggeredValidator struct{}

GuardrailTriggeredValidator is an assertion validator that checks if a specific guardrail validator triggered (or didn't trigger) as expected. This is useful for testing that guardrails work correctly in PromptArena scenarios.

func (*GuardrailTriggeredValidator) Validate added in v1.1.0

func (v *GuardrailTriggeredValidator) Validate(
	content string,
	params map[string]interface{},
) runtimeValidators.ValidationResult

Validate checks if the expected validator triggered as expected

type ImageDimensionsValidator added in v1.1.0

type ImageDimensionsValidator struct {
	// contains filtered or unexported fields
}

ImageDimensionsValidator checks that images meet dimension requirements

func (*ImageDimensionsValidator) Validate added in v1.1.0

func (v *ImageDimensionsValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if images meet dimension requirements

type ImageFormatValidator added in v1.1.0

type ImageFormatValidator struct {
	// contains filtered or unexported fields
}

ImageFormatValidator checks that media content has one of the allowed image formats

func (*ImageFormatValidator) Validate added in v1.1.0

func (v *ImageFormatValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if the message contains images with allowed formats

type IsValidJSONValidator added in v1.1.1

type IsValidJSONValidator struct {
	// contains filtered or unexported fields
}

IsValidJSONValidator validates that content is parseable JSON

func (*IsValidJSONValidator) Validate added in v1.1.1

func (v *IsValidJSONValidator) Validate(
	content string,
	params map[string]interface{},
) runtimeValidators.ValidationResult

Validate checks if content is valid JSON

type JSONPathValidator added in v1.1.1

type JSONPathValidator struct {
	// contains filtered or unexported fields
}

JSONPathValidator validates JSON using JMESPath expressions

func (*JSONPathValidator) Validate added in v1.1.1

func (v *JSONPathValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate executes JMESPath expression and validates result

type JSONSchemaValidator added in v1.1.1

type JSONSchemaValidator struct {
	// contains filtered or unexported fields
}

JSONSchemaValidator validates JSON against a JSON Schema

func (*JSONSchemaValidator) Validate added in v1.1.1

func (v *JSONSchemaValidator) Validate(
	content string,
	params map[string]interface{},
) runtimeValidators.ValidationResult

Validate checks if JSON content matches the schema

type LengthCapValidator

type LengthCapValidator struct{}

LengthCapValidator ensures user messages are at most 2 sentences

func NewLengthCapValidator

func NewLengthCapValidator() *LengthCapValidator

NewLengthCapValidator creates a new length cap validator

func (*LengthCapValidator) SupportsStreaming

func (v *LengthCapValidator) SupportsStreaming() bool

SupportsStreaming returns false as sentence counting requires complete content

func (*LengthCapValidator) Validate

func (v *LengthCapValidator) Validate(content string, params map[string]interface{}) ValidationResult

Validate checks for sentence count violations

type QuestionCapValidator

type QuestionCapValidator struct{}

QuestionCapValidator ensures user messages have at most one question

func NewQuestionCapValidator

func NewQuestionCapValidator() *QuestionCapValidator

NewQuestionCapValidator creates a new question cap validator

func (*QuestionCapValidator) SupportsStreaming

func (v *QuestionCapValidator) SupportsStreaming() bool

SupportsStreaming returns false as question counting requires complete content

func (*QuestionCapValidator) Validate

func (v *QuestionCapValidator) Validate(content string, params map[string]interface{}) ValidationResult

Validate checks for question count violations

type RoleIntegrityValidator

type RoleIntegrityValidator struct{}

RoleIntegrityValidator checks that user LLM doesn't provide assistant-like responses

func NewRoleIntegrityValidator

func NewRoleIntegrityValidator() *RoleIntegrityValidator

NewRoleIntegrityValidator creates a new role integrity validator

func (*RoleIntegrityValidator) SupportsStreaming

func (v *RoleIntegrityValidator) SupportsStreaming() bool

SupportsStreaming returns false as role integrity requires complete content

func (*RoleIntegrityValidator) Validate

func (v *RoleIntegrityValidator) Validate(content string, params map[string]interface{}) ValidationResult

Validate checks for role integrity violations

type ToolCallRecord added in v1.1.3

type ToolCallRecord struct {
	TurnIndex int                    `json:"turn_index"` // Which turn (index in AllTurns) this call occurred in
	ToolName  string                 `json:"tool_name"`  // Name of the tool called
	Arguments map[string]interface{} `json:"arguments"`  // Arguments passed to the tool
	Result    interface{}            `json:"result"`     // Tool's return value (nil if tool failed)
	Error     string                 `json:"error"`      // Error message if tool failed (empty on success)
	Duration  time.Duration          `json:"duration"`   // How long the tool took to execute
}

ToolCallRecord captures a single tool invocation for assertion checking. Records both the invocation parameters and the result for comprehensive validation.

type ToolCallsWithArgsConversationValidator added in v1.1.3

type ToolCallsWithArgsConversationValidator struct{}

ToolCallsWithArgsConversationValidator ensures all calls to a specific tool include required arguments with expected values. Params: - tool_name: string - required_args: map[string]interface{} expected values; if value is nil, only presence is required Type: "tool_calls_with_args"

func (*ToolCallsWithArgsConversationValidator) Type added in v1.1.3

Type returns the validator type name.

func (*ToolCallsWithArgsConversationValidator) ValidateConversation added in v1.1.3

func (v *ToolCallsWithArgsConversationValidator) ValidateConversation(
	ctx context.Context,
	convCtx *ConversationContext,
	params map[string]interface{},
) ConversationValidationResult

ValidateConversation checks all calls for required args and values.

type ToolsCalledConversationValidator added in v1.1.3

type ToolsCalledConversationValidator struct{}

ToolsCalledConversationValidator checks that specific tools were called at least a minimum number of times across the full conversation. Params: - tool_names: []string required tools - min_calls: int optional (default 1) minimum calls per tool Type: "tools_called"

func (*ToolsCalledConversationValidator) Type added in v1.1.3

Type returns the validator type name.

func (*ToolsCalledConversationValidator) ValidateConversation added in v1.1.3

func (v *ToolsCalledConversationValidator) ValidateConversation(
	ctx context.Context,
	convCtx *ConversationContext,
	params map[string]interface{},
) ConversationValidationResult

ValidateConversation evaluates whether all required tools were called at least the minimum number of times across the conversation.

type ToolsCalledValidator

type ToolsCalledValidator struct {
	// contains filtered or unexported fields
}

ToolsCalledValidator checks that expected tools were called in the response

func (*ToolsCalledValidator) Validate

func (v *ToolsCalledValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if expected tools were called

type ToolsNotCalledConversationValidator added in v1.1.3

type ToolsNotCalledConversationValidator struct{}

ToolsNotCalledConversationValidator checks that specific tools were NOT called anywhere in the conversation. Params: - tool_names: []string forbidden tools Type: "tools_not_called"

func (*ToolsNotCalledConversationValidator) Type added in v1.1.3

Type returns the validator type name.

func (*ToolsNotCalledConversationValidator) ValidateConversation added in v1.1.3

func (v *ToolsNotCalledConversationValidator) ValidateConversation(
	ctx context.Context,
	convCtx *ConversationContext,
	params map[string]interface{},
) ConversationValidationResult

ValidateConversation ensures forbidden tools were never called across the conversation.

type ToolsNotCalledValidator

type ToolsNotCalledValidator struct {
	// contains filtered or unexported fields
}

ToolsNotCalledValidator checks that forbidden tools were NOT called in the response

func (*ToolsNotCalledValidator) Validate

func (v *ToolsNotCalledValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if any forbidden tools were called

type ToolsNotCalledWithArgsConversationValidator added in v1.1.3

type ToolsNotCalledWithArgsConversationValidator struct{}

ToolsNotCalledWithArgsConversationValidator ensures a given tool was never called with any of the forbidden argument values. Params: - tool_name: string - forbidden_args: map[string][]interface{} where key is arg name and value is list of forbidden values Type: "tools_not_called_with_args"

func (*ToolsNotCalledWithArgsConversationValidator) Type added in v1.1.3

Type returns the validator type name.

func (*ToolsNotCalledWithArgsConversationValidator) ValidateConversation added in v1.1.3

func (v *ToolsNotCalledWithArgsConversationValidator) ValidateConversation(
	ctx context.Context,
	convCtx *ConversationContext,
	params map[string]interface{},
) ConversationValidationResult

ValidateConversation checks all tool calls for forbidden argument values.

type ValidationResult

type ValidationResult = validators.ValidationResult

ValidationResult is an alias for runtime validators.ValidationResult

type VideoDurationValidator added in v1.1.0

type VideoDurationValidator struct {
	// contains filtered or unexported fields
}

VideoDurationValidator checks that video duration is within range

func (*VideoDurationValidator) Validate added in v1.1.0

func (v *VideoDurationValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if video duration is within allowed range

type VideoResolutionValidator added in v1.1.0

type VideoResolutionValidator struct {
	// contains filtered or unexported fields
}

VideoResolutionValidator checks that video resolution meets requirements

func (*VideoResolutionValidator) Validate added in v1.1.0

func (v *VideoResolutionValidator) Validate(content string, params map[string]interface{}) runtimeValidators.ValidationResult

Validate checks if video resolution meets requirements

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL