handlers

package

v1.3.9 Latest Latest Go to latest Published: Mar 4, 2026 License: Apache-2.0 Imports: 17 Imported by: 6

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/AltairaLabs/PromptKit

Links

Open Source Insights

Documentation ¶

Overview ¶

Package handlers provides eval type handler implementations.

Index ¶

type A2AEvalHandler
- func (h *A2AEvalHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *A2AEvalHandler) Type() string
type A2AEvalSessionHandler
- func (h *A2AEvalSessionHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *A2AEvalSessionHandler) Type() string
type AgentInvokedHandler
- func (h *AgentInvokedHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *AgentInvokedHandler) Type() string
type AgentNotInvokedHandler
- func (h *AgentNotInvokedHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *AgentNotInvokedHandler) Type() string
type AgentResponseContainsHandler
- func (h *AgentResponseContainsHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *AgentResponseContainsHandler) Type() string
type AudioDurationHandler
- func (h *AudioDurationHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *AudioDurationHandler) Type() string
type AudioFormatHandler
- func (h *AudioFormatHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *AudioFormatHandler) Type() string
type ContainsAnyHandler
- func (h *ContainsAnyHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (_ *evals.EvalResult, _ error)
- func (h *ContainsAnyHandler) Type() string
type ContainsHandler
- func (h *ContainsHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *ContainsHandler) Type() string
type ContentExcludesHandler
- func (h *ContentExcludesHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (_ *evals.EvalResult, _ error)
- func (h *ContentExcludesHandler) Type() string
type CosineSimilarityHandler
- func (h *CosineSimilarityHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *CosineSimilarityHandler) Type() string
type ExternalEvalRequest
type GuardrailTriggeredHandler
- func (h *GuardrailTriggeredHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *GuardrailTriggeredHandler) Type() string
type ImageDimensionsHandler
- func (h *ImageDimensionsHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *ImageDimensionsHandler) Type() string
type ImageFormatHandler
- func (h *ImageFormatHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *ImageFormatHandler) Type() string
type JSONPathHandler
- func (h *JSONPathHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *JSONPathHandler) Type() string
type JSONSchemaHandler
- func (h *JSONSchemaHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *JSONSchemaHandler) Type() string
type JSONValidHandler
- func (h *JSONValidHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, _ map[string]any) (result *evals.EvalResult, err error)
- func (h *JSONValidHandler) Type() string
type JudgeOpts
type JudgeProvider
type JudgeResult
type LLMJudgeHandler
- func (h *LLMJudgeHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *LLMJudgeHandler) Type() string
type LLMJudgeSessionHandler
- func (h *LLMJudgeSessionHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *LLMJudgeSessionHandler) Type() string
type LLMJudgeToolCallsHandler
- func (h *LLMJudgeToolCallsHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *LLMJudgeToolCallsHandler) Type() string
type LatencyBudgetHandler
- func (h *LatencyBudgetHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *LatencyBudgetHandler) Type() string
type NoToolErrorsHandler
- func (h *NoToolErrorsHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *NoToolErrorsHandler) Type() string
type RegexHandler
- func (h *RegexHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *RegexHandler) Type() string
type RestEvalHandler
- func (h *RestEvalHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *RestEvalHandler) Type() string
type RestEvalSessionHandler
- func (h *RestEvalSessionHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *RestEvalSessionHandler) Type() string
type SkillActivatedHandler
- func (h *SkillActivatedHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *SkillActivatedHandler) Type() string
type SkillNotActivatedHandler
- func (h *SkillNotActivatedHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *SkillNotActivatedHandler) Type() string
type SpecJudgeProvider
- func NewSpecJudgeProvider(spec *providers.ProviderSpec) *SpecJudgeProvider
- func (sp *SpecJudgeProvider) Judge(ctx context.Context, opts JudgeOpts) (*JudgeResult, error)
type ToolArgsExcludedSessionHandler
- func (h *ToolArgsExcludedSessionHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (_ *evals.EvalResult, _ error)
- func (h *ToolArgsExcludedSessionHandler) Type() string
type ToolArgsHandler
- func (h *ToolArgsHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *ToolArgsHandler) Type() string
type ToolArgsSessionHandler
- func (h *ToolArgsSessionHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (_ *evals.EvalResult, _ error)
- func (h *ToolArgsSessionHandler) Type() string
type ToolCallChainHandler
- func (h *ToolCallChainHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *ToolCallChainHandler) Type() string
type ToolCallCountHandler
- func (h *ToolCallCountHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *ToolCallCountHandler) Type() string
type ToolCallSequenceHandler
- func (h *ToolCallSequenceHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *ToolCallSequenceHandler) Type() string
type ToolCallsWithArgsHandler
- func (h *ToolCallsWithArgsHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *ToolCallsWithArgsHandler) Type() string
type ToolResultIncludesHandler
- func (h *ToolResultIncludesHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *ToolResultIncludesHandler) Type() string
type ToolResultMatchesHandler
- func (h *ToolResultMatchesHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *ToolResultMatchesHandler) Type() string
type ToolsCalledHandler
- func (h *ToolsCalledHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *ToolsCalledHandler) Type() string
type ToolsCalledSessionHandler
- func (h *ToolsCalledSessionHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (_ *evals.EvalResult, _ error)
- func (h *ToolsCalledSessionHandler) Type() string
type ToolsNotCalledHandler
- func (h *ToolsNotCalledHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (result *evals.EvalResult, err error)
- func (h *ToolsNotCalledHandler) Type() string
type ToolsNotCalledSessionHandler
- func (h *ToolsNotCalledSessionHandler) Eval(ctx context.Context, evalCtx *evals.EvalContext, params map[string]any) (_ *evals.EvalResult, _ error)
- func (h *ToolsNotCalledSessionHandler) Type() string
type VideoDurationHandler
- func (h *VideoDurationHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *VideoDurationHandler) Type() string
type VideoResolutionHandler
- func (h *VideoResolutionHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *VideoResolutionHandler) Type() string
type WorkflowCompleteHandler
- func (h *WorkflowCompleteHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, _ map[string]any) (*evals.EvalResult, error)
- func (h *WorkflowCompleteHandler) Type() string
type WorkflowStateIsHandler
- func (h *WorkflowStateIsHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *WorkflowStateIsHandler) Type() string
type WorkflowTransitionedToHandler
- func (h *WorkflowTransitionedToHandler) Eval(_ context.Context, evalCtx *evals.EvalContext, params map[string]any) (*evals.EvalResult, error)
- func (h *WorkflowTransitionedToHandler) Type() string

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type A2AEvalHandler ¶ added in v1.3.3

type A2AEvalHandler struct{}

A2AEvalHandler evaluates a single assistant turn by sending conversation context to an A2A agent and interpreting the agent's response as a structured eval result.

Params:

agent_url (string, required): A2A agent endpoint URL
auth_token (string, optional): auth token, supports ${ENV_VAR}
timeout (string, optional): request timeout, default 60s
criteria (string, optional): evaluation criteria
include_messages (bool, optional): include conversation history, default true
include_tool_calls (bool, optional): include tool call records, default false
min_score (float64, optional): minimum score threshold
extra (map[string]any, optional): arbitrary data forwarded in request

func (*A2AEvalHandler) Eval ¶ added in v1.3.3

func (h *A2AEvalHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval sends the current assistant output to the configured A2A agent.

func (*A2AEvalHandler) Type ¶ added in v1.3.3

func (h *A2AEvalHandler) Type() string

Type returns the eval type identifier.

type A2AEvalSessionHandler ¶ added in v1.3.3

type A2AEvalSessionHandler struct{}

A2AEvalSessionHandler evaluates an entire conversation by sending all assistant messages to an A2A agent.

Params: same as A2AEvalHandler.

func (*A2AEvalSessionHandler) Eval ¶ added in v1.3.3

func (h *A2AEvalSessionHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval sends all assistant messages to the configured A2A agent.

func (*A2AEvalSessionHandler) Type ¶ added in v1.3.3

func (h *A2AEvalSessionHandler) Type() string

Type returns the eval type identifier.

type AgentInvokedHandler ¶ added in v1.3.2

type AgentInvokedHandler struct{}

AgentInvokedHandler checks that expected agents were invoked as tool calls. Params: agents []string — list of agent names that should have been called.

func (*AgentInvokedHandler) Eval ¶ added in v1.3.2

func (h *AgentInvokedHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks if expected agents were invoked.

func (*AgentInvokedHandler) Type ¶ added in v1.3.2

func (h *AgentInvokedHandler) Type() string

Type returns the eval type identifier.

type AgentNotInvokedHandler ¶ added in v1.3.2

type AgentNotInvokedHandler struct{}

AgentNotInvokedHandler checks that forbidden agents were NOT called. Params: agents []string — agent names that should not have been called.

func (*AgentNotInvokedHandler) Eval ¶ added in v1.3.2

func (h *AgentNotInvokedHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks if any forbidden agents were invoked.

func (*AgentNotInvokedHandler) Type ¶ added in v1.3.2

func (h *AgentNotInvokedHandler) Type() string

Type returns the eval type identifier.

type AgentResponseContainsHandler ¶ added in v1.3.2

type AgentResponseContainsHandler struct{}

AgentResponseContainsHandler checks that a specific agent's response contains expected text. Agent responses appear as tool-result messages where the tool name matches the agent name. Params: agent string, contains string.

func (*AgentResponseContainsHandler) Eval ¶ added in v1.3.2

func (h *AgentResponseContainsHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks if the specified agent's tool result contains the expected text.

func (*AgentResponseContainsHandler) Type ¶ added in v1.3.2

func (h *AgentResponseContainsHandler) Type() string

Type returns the eval type identifier.

type AudioDurationHandler ¶ added in v1.3.2

type AudioDurationHandler struct{}

AudioDurationHandler checks that audio duration is within range. Params: min_seconds float64, max_seconds float64.

func (*AudioDurationHandler) Eval ¶ added in v1.3.2

func (h *AudioDurationHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks audio duration constraints.

func (*AudioDurationHandler) Type ¶ added in v1.3.2

func (h *AudioDurationHandler) Type() string

Type returns the eval type identifier.

type AudioFormatHandler ¶ added in v1.3.2

type AudioFormatHandler struct{}

AudioFormatHandler checks that audio content has allowed formats. Params: formats []string.

func (*AudioFormatHandler) Eval ¶ added in v1.3.2

func (h *AudioFormatHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks audio formats against the allowed list.

func (*AudioFormatHandler) Type ¶ added in v1.3.2

func (h *AudioFormatHandler) Type() string

Type returns the eval type identifier.

type ContainsAnyHandler ¶

type ContainsAnyHandler struct{}

ContainsAnyHandler checks that at least one assistant message contains at least one of the specified patterns. Params: patterns []string (case-insensitive matching).

func (*ContainsAnyHandler) Eval ¶

func (h *ContainsAnyHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (_ *evals.EvalResult, _ error)

Eval checks assistant messages for any matching pattern.

func (*ContainsAnyHandler) Type ¶

func (h *ContainsAnyHandler) Type() string

Type returns the eval type identifier.

type ContainsHandler ¶

type ContainsHandler struct{}

ContainsHandler checks if CurrentOutput contains all specified patterns (case-insensitive). Params: patterns []string.

func (*ContainsHandler) Eval ¶

func (h *ContainsHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval checks that all patterns appear in the current output.

func (*ContainsHandler) Type ¶

func (h *ContainsHandler) Type() string

Type returns the eval type identifier.

type ContentExcludesHandler ¶

type ContentExcludesHandler struct{}

ContentExcludesHandler checks that NONE of the assistant messages across the full conversation contain any of the forbidden patterns. Params: patterns []string (case-insensitive matching).

func (*ContentExcludesHandler) Eval ¶

func (h *ContentExcludesHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (_ *evals.EvalResult, _ error)

Eval checks all assistant messages for forbidden patterns.

func (*ContentExcludesHandler) Type ¶

func (h *ContentExcludesHandler) Type() string

Type returns the eval type identifier.

type CosineSimilarityHandler ¶

type CosineSimilarityHandler struct{}

CosineSimilarityHandler computes cosine similarity between embeddings. Params: reference []float64, min_similarity float64. Target embedding comes from Metadata["embedding"].

func (*CosineSimilarityHandler) Eval ¶

func (h *CosineSimilarityHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval computes cosine similarity and checks against threshold.

func (*CosineSimilarityHandler) Type ¶

func (h *CosineSimilarityHandler) Type() string

Type returns the eval type identifier.

type ExternalEvalRequest ¶ added in v1.3.3

type ExternalEvalRequest struct {
	CurrentOutput string         `json:"current_output"`
	Messages      []messageView  `json:"messages,omitempty"`
	ToolCalls     []toolCallView `json:"tool_calls,omitempty"`
	Criteria      string         `json:"criteria,omitempty"`
	Variables     map[string]any `json:"variables,omitempty"`
	Extra         map[string]any `json:"extra,omitempty"`
}

ExternalEvalRequest is the standard request body sent to external eval endpoints (REST) and formatted as context for A2A eval agents.

type GuardrailTriggeredHandler ¶ added in v1.3.2

type GuardrailTriggeredHandler struct{}

GuardrailTriggeredHandler checks if a specific guardrail validator triggered (or didn't trigger) as expected. Params: validator_type string, should_trigger bool (default true).

func (*GuardrailTriggeredHandler) Eval ¶ added in v1.3.2

func (h *GuardrailTriggeredHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks guardrail validation results from the last assistant message.

func (*GuardrailTriggeredHandler) Type ¶ added in v1.3.2

func (h *GuardrailTriggeredHandler) Type() string

Type returns the eval type identifier.

type ImageDimensionsHandler ¶ added in v1.3.2

type ImageDimensionsHandler struct{}

ImageDimensionsHandler checks that images meet dimension requirements. Params: min_width, max_width, min_height, max_height, width, height.

func (*ImageDimensionsHandler) Eval ¶ added in v1.3.2

func (h *ImageDimensionsHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks image dimensions against constraints.

func (*ImageDimensionsHandler) Type ¶ added in v1.3.2

func (h *ImageDimensionsHandler) Type() string

Type returns the eval type identifier.

type ImageFormatHandler ¶ added in v1.3.2

type ImageFormatHandler struct{}

ImageFormatHandler checks that images in assistant messages have allowed formats. Params: formats []string.

func (*ImageFormatHandler) Eval ¶ added in v1.3.2

func (h *ImageFormatHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks image formats against the allowed list.

func (*ImageFormatHandler) Type ¶ added in v1.3.2

func (h *ImageFormatHandler) Type() string

Type returns the eval type identifier.

type JSONPathHandler ¶ added in v1.3.2

type JSONPathHandler struct{}

JSONPathHandler validates assistant output as JSON using JMESPath expressions. Params:

expression string (JMESPath expression)
expected any (optional: exact match)
contains []any (optional: array contains check)
min_results int, max_results int (optional: array length bounds)
min float64, max float64 (optional: numeric range)
allow_wrapped bool (optional: extract JSON from code blocks)
extract_json bool (optional: extract JSON from mixed text)

func (*JSONPathHandler) Eval ¶ added in v1.3.2

func (h *JSONPathHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval executes a JMESPath expression on the assistant output and validates the result.

func (*JSONPathHandler) Type ¶ added in v1.3.2

func (h *JSONPathHandler) Type() string

Type returns the eval type identifier.

type JSONSchemaHandler ¶

type JSONSchemaHandler struct{}

JSONSchemaHandler validates CurrentOutput against a JSON schema. Params: schema map[string]any.

func (*JSONSchemaHandler) Eval ¶

func (h *JSONSchemaHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval validates the current output against the provided JSON schema.

func (*JSONSchemaHandler) Type ¶

func (h *JSONSchemaHandler) Type() string

Type returns the eval type identifier.

type JSONValidHandler ¶

type JSONValidHandler struct{}

JSONValidHandler checks if CurrentOutput is valid JSON. No required params.

func (*JSONValidHandler) Eval ¶

func (h *JSONValidHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	_ map[string]any,
) (result *evals.EvalResult, err error)

Eval checks that the current output is parseable JSON.

func (*JSONValidHandler) Type ¶

func (h *JSONValidHandler) Type() string

Type returns the eval type identifier.

type JudgeOpts ¶

type JudgeOpts struct {
	// Content is the text being evaluated (assistant response or full conversation).
	Content string

	// Criteria describes what the judge should evaluate (e.g. "Is the response helpful?").
	Criteria string

	// Rubric provides detailed scoring guidance (optional).
	Rubric string

	// Model specifies which model to use for judging (optional, provider decides default).
	Model string

	// SystemPrompt overrides the default judge system prompt (optional).
	SystemPrompt string

	// MinScore is the minimum score threshold for passing (optional).
	MinScore *float64

	// Extra holds additional parameters for provider-specific features.
	Extra map[string]any
}

JudgeOpts configures a judge evaluation request.

type JudgeProvider ¶

type JudgeProvider interface {
	// Judge sends the evaluation prompt to an LLM and returns
	// the parsed verdict. Implementations handle provider selection,
	// prompt formatting, and response parsing.
	Judge(ctx context.Context, opts JudgeOpts) (*JudgeResult, error)
}

JudgeProvider abstracts LLM access for judge-based evaluations. Arena, SDK, and eval workers each provide their own implementation wiring their respective provider infrastructure.

type JudgeResult ¶

type JudgeResult struct {
	// Passed indicates whether the content met the evaluation criteria.
	Passed bool

	// Score is the numerical score assigned by the judge (typically 0.0-1.0).
	Score float64

	// Reasoning explains the judge's evaluation.
	Reasoning string

	// Raw is the unprocessed LLM response text.
	Raw string
}

JudgeResult captures the output of an LLM judge evaluation.

type LLMJudgeHandler ¶

type LLMJudgeHandler struct{}

LLMJudgeHandler evaluates a single assistant turn using an LLM judge. The JudgeProvider must be supplied in evalCtx.Metadata["judge_provider"].

Params:

criteria (string, required): what to evaluate
rubric (string, optional): detailed scoring guidance
model (string, optional): model override for the judge
system_prompt (string, optional): override default system prompt
min_score (float64, optional): minimum score to pass

func (*LLMJudgeHandler) Eval ¶

func (h *LLMJudgeHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval runs the LLM judge on the current assistant output.

func (*LLMJudgeHandler) Type ¶

func (h *LLMJudgeHandler) Type() string

Type returns the eval type identifier.

type LLMJudgeSessionHandler ¶

type LLMJudgeSessionHandler struct{}

LLMJudgeSessionHandler evaluates an entire conversation using an LLM judge. It concatenates all assistant messages into a single content string for evaluation.

The JudgeProvider must be supplied in evalCtx.Metadata["judge_provider"].

Params:

criteria (string, required): what to evaluate
rubric (string, optional): detailed scoring guidance
model (string, optional): model override for the judge
system_prompt (string, optional): override default system prompt
min_score (float64, optional): minimum score to pass

func (*LLMJudgeSessionHandler) Eval ¶

func (h *LLMJudgeSessionHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval runs the LLM judge on all assistant messages in the session.

func (*LLMJudgeSessionHandler) Type ¶

func (h *LLMJudgeSessionHandler) Type() string

Type returns the eval type identifier.

type LLMJudgeToolCallsHandler ¶ added in v1.3.2

type LLMJudgeToolCallsHandler struct{}

LLMJudgeToolCallsHandler evaluates tool call behavior via an LLM judge. Instead of judging the assistant's text response, it feeds tool call data (names, arguments, results) to the judge for evaluation. Params:

tools []string (optional): filter to specific tool names
criteria string: what to evaluate
rubric string (optional): detailed scoring guidance
model string (optional): model override
min_score float64 (optional): minimum score to pass

func (*LLMJudgeToolCallsHandler) Eval ¶ added in v1.3.2

func (h *LLMJudgeToolCallsHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval runs the LLM judge on formatted tool call data.

func (*LLMJudgeToolCallsHandler) Type ¶ added in v1.3.2

func (h *LLMJudgeToolCallsHandler) Type() string

Type returns the eval type identifier.

type LatencyBudgetHandler ¶

type LatencyBudgetHandler struct{}

LatencyBudgetHandler checks Metadata["latency_ms"] against a max. Params: max_ms float64.

func (*LatencyBudgetHandler) Eval ¶

func (h *LatencyBudgetHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval checks that the latency is within budget.

func (*LatencyBudgetHandler) Type ¶

func (h *LatencyBudgetHandler) Type() string

Type returns the eval type identifier.

type NoToolErrorsHandler ¶ added in v1.3.2

type NoToolErrorsHandler struct{}

NoToolErrorsHandler checks that no tool calls returned errors. Params: tools []string (optional) — if set, only checks calls matching those tool names.

func (*NoToolErrorsHandler) Eval ¶ added in v1.3.2

func (h *NoToolErrorsHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks for tool errors in the eval context's tool calls.

func (*NoToolErrorsHandler) Type ¶ added in v1.3.2

func (h *NoToolErrorsHandler) Type() string

Type returns the eval type identifier.

type RegexHandler ¶

type RegexHandler struct{}

RegexHandler checks if CurrentOutput matches a regex pattern. Params: pattern string.

func (*RegexHandler) Eval ¶

func (h *RegexHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval checks that the current output matches the regex pattern.

func (*RegexHandler) Type ¶

func (h *RegexHandler) Type() string

Type returns the eval type identifier.

type RestEvalHandler ¶ added in v1.3.3

type RestEvalHandler struct{}

RestEvalHandler evaluates a single assistant turn by POSTing conversation context to an external HTTP endpoint and interpreting the structured JSON response.

Params:

url (string, required): endpoint URL
method (string, optional): HTTP method, default POST
headers (map[string]string, optional): request headers, supports ${ENV_VAR}
timeout (string, optional): request timeout, default 30s
include_messages (bool, optional): include conversation history, default true
include_tool_calls (bool, optional): include tool call records, default false
criteria (string, optional): evaluation criteria forwarded in request
min_score (float64, optional): minimum score threshold
extra (map[string]any, optional): arbitrary data forwarded in request

func (*RestEvalHandler) Eval ¶ added in v1.3.3

func (h *RestEvalHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval sends the current assistant output to the configured REST endpoint.

func (*RestEvalHandler) Type ¶ added in v1.3.3

func (h *RestEvalHandler) Type() string

Type returns the eval type identifier.

type RestEvalSessionHandler ¶ added in v1.3.3

type RestEvalSessionHandler struct{}

RestEvalSessionHandler evaluates an entire conversation by POSTing all assistant messages to an external HTTP endpoint.

Params: same as RestEvalHandler.

func (*RestEvalSessionHandler) Eval ¶ added in v1.3.3

func (h *RestEvalSessionHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval sends all assistant messages to the configured REST endpoint.

func (*RestEvalSessionHandler) Type ¶ added in v1.3.3

func (h *RestEvalSessionHandler) Type() string

Type returns the eval type identifier.

type SkillActivatedHandler ¶ added in v1.3.2

type SkillActivatedHandler struct{}

SkillActivatedHandler checks that specific skills were activated. Scans evalCtx.ToolCalls for "skill__activate" calls and extracts the "name" argument. Params: skill_names []string, min_calls int (optional, default 1).

func (*SkillActivatedHandler) Eval ¶ added in v1.3.2

func (h *SkillActivatedHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks that required skills were activated at least the minimum number of times.

func (*SkillActivatedHandler) Type ¶ added in v1.3.2

func (h *SkillActivatedHandler) Type() string

Type returns the eval type identifier.

type SkillNotActivatedHandler ¶ added in v1.3.2

type SkillNotActivatedHandler struct{}

SkillNotActivatedHandler checks that specific skills were NOT activated. Params: skill_names []string.

func (*SkillNotActivatedHandler) Eval ¶ added in v1.3.2

func (h *SkillNotActivatedHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval ensures forbidden skills were never activated.

func (*SkillNotActivatedHandler) Type ¶ added in v1.3.2

func (h *SkillNotActivatedHandler) Type() string

Type returns the eval type identifier.

type SpecJudgeProvider ¶ added in v1.3.2

type SpecJudgeProvider struct {
	// contains filtered or unexported fields
}

SpecJudgeProvider implements JudgeProvider by creating a provider from a ProviderSpec. This is the standard implementation used by Arena and any caller that has judge targets as ProviderSpecs.

func NewSpecJudgeProvider ¶ added in v1.3.2

func NewSpecJudgeProvider(spec *providers.ProviderSpec) *SpecJudgeProvider

NewSpecJudgeProvider creates a JudgeProvider from a provider spec.

func (*SpecJudgeProvider) Judge ¶ added in v1.3.2

func (sp *SpecJudgeProvider) Judge(ctx context.Context, opts JudgeOpts) (*JudgeResult, error)

Judge creates a provider from the spec, sends the evaluation prompt, and parses the verdict.

type ToolArgsExcludedSessionHandler ¶

type ToolArgsExcludedSessionHandler struct{}

ToolArgsExcludedSessionHandler checks that a tool was NOT called with specific argument values across the session. Params: tool_name string, excluded_args map[string]any.

func (*ToolArgsExcludedSessionHandler) Eval ¶

func (h *ToolArgsExcludedSessionHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (_ *evals.EvalResult, _ error)

Eval ensures the tool was never called with excluded args.

func (*ToolArgsExcludedSessionHandler) Type ¶

func (h *ToolArgsExcludedSessionHandler) Type() string

Type returns the eval type identifier.

type ToolArgsHandler ¶

type ToolArgsHandler struct{}

ToolArgsHandler checks that a tool was called with specific args. Params: tool_name string, expected_args map[string]any.

func (*ToolArgsHandler) Eval ¶

func (h *ToolArgsHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval checks that the specified tool was called with matching args.

func (*ToolArgsHandler) Type ¶

func (h *ToolArgsHandler) Type() string

Type returns the eval type identifier.

type ToolArgsSessionHandler ¶

type ToolArgsSessionHandler struct{}

ToolArgsSessionHandler checks that a tool was called with specific arguments across the session. Params: tool_name string, expected_args map[string]any.

func (*ToolArgsSessionHandler) Eval ¶

func (h *ToolArgsSessionHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (_ *evals.EvalResult, _ error)

Eval checks tool calls for expected arguments.

func (*ToolArgsSessionHandler) Type ¶

func (h *ToolArgsSessionHandler) Type() string

Type returns the eval type identifier.

type ToolCallChainHandler ¶ added in v1.3.2

type ToolCallChainHandler struct{}

ToolCallChainHandler checks a dependency chain of tool calls with per-step constraints. Params: steps []map — each with tool, result_includes, result_matches, args_match, no_error.

func (*ToolCallChainHandler) Eval ¶ added in v1.3.2

func (h *ToolCallChainHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks that the chain of tool calls satisfies all step constraints in order.

func (*ToolCallChainHandler) Type ¶ added in v1.3.2

func (h *ToolCallChainHandler) Type() string

Type returns the eval type identifier.

type ToolCallCountHandler ¶ added in v1.3.2

type ToolCallCountHandler struct{}

ToolCallCountHandler checks the count of tool calls within bounds. Params: tool string (optional), min int (optional), max int (optional).

func (*ToolCallCountHandler) Eval ¶ added in v1.3.2

func (h *ToolCallCountHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval counts matching tool calls and checks min/max bounds.

func (*ToolCallCountHandler) Type ¶ added in v1.3.2

func (h *ToolCallCountHandler) Type() string

Type returns the eval type identifier.

type ToolCallSequenceHandler ¶ added in v1.3.2

type ToolCallSequenceHandler struct{}

ToolCallSequenceHandler checks that tool calls appear in a specified subsequence order. Params: sequence []string — the expected tool names in order.

func (*ToolCallSequenceHandler) Eval ¶ added in v1.3.2

func (h *ToolCallSequenceHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks subsequence ordering of tool calls.

func (*ToolCallSequenceHandler) Type ¶ added in v1.3.2

func (h *ToolCallSequenceHandler) Type() string

Type returns the eval type identifier.

type ToolCallsWithArgsHandler ¶ added in v1.3.2

type ToolCallsWithArgsHandler struct{}

ToolCallsWithArgsHandler checks that a tool was called with expected arguments. Supports exact value matching (expected_args), regex pattern matching (args_match), and result-level constraints (result_includes, result_matches, no_error).

func (*ToolCallsWithArgsHandler) Eval ¶ added in v1.3.2

func (h *ToolCallsWithArgsHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks tool calls for argument and result constraints.

func (*ToolCallsWithArgsHandler) Type ¶ added in v1.3.2

func (h *ToolCallsWithArgsHandler) Type() string

Type returns the eval type identifier.

type ToolResultIncludesHandler ¶ added in v1.3.2

type ToolResultIncludesHandler struct{}

ToolResultIncludesHandler checks that tool results contain expected substrings. Params: tool string, patterns []string, occurrence int (optional, default 1).

func (*ToolResultIncludesHandler) Eval ¶ added in v1.3.2

func (h *ToolResultIncludesHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks substring patterns in tool results.

func (*ToolResultIncludesHandler) Type ¶ added in v1.3.2

func (h *ToolResultIncludesHandler) Type() string

Type returns the eval type identifier.

type ToolResultMatchesHandler ¶ added in v1.3.2

type ToolResultMatchesHandler struct{}

ToolResultMatchesHandler checks that tool results match a regex pattern. Params: tool string, pattern string, occurrence int (optional, default 1).

func (*ToolResultMatchesHandler) Eval ¶ added in v1.3.2

func (h *ToolResultMatchesHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks a regex pattern on tool results.

func (*ToolResultMatchesHandler) Type ¶ added in v1.3.2

func (h *ToolResultMatchesHandler) Type() string

Type returns the eval type identifier.

type ToolsCalledHandler ¶

type ToolsCalledHandler struct{}

ToolsCalledHandler checks if specific tools were called. Params: tool_names []string, optional min_calls int.

func (*ToolsCalledHandler) Eval ¶

func (h *ToolsCalledHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval checks that all expected tools were called.

func (*ToolsCalledHandler) Type ¶

func (h *ToolsCalledHandler) Type() string

Type returns the eval type identifier.

type ToolsCalledSessionHandler ¶

type ToolsCalledSessionHandler struct{}

ToolsCalledSessionHandler checks that specific tools were called across the full session. Params: tool_names []string, min_calls int (optional, default 1).

func (*ToolsCalledSessionHandler) Eval ¶

func (h *ToolsCalledSessionHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (_ *evals.EvalResult, _ error)

Eval checks that all required tools were called at least min_calls times.

func (*ToolsCalledSessionHandler) Type ¶

func (h *ToolsCalledSessionHandler) Type() string

Type returns the eval type identifier.

type ToolsNotCalledHandler ¶

type ToolsNotCalledHandler struct{}

ToolsNotCalledHandler checks that specific tools were NOT called. Params: tool_names []string.

func (*ToolsNotCalledHandler) Eval ¶

func (h *ToolsNotCalledHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (result *evals.EvalResult, err error)

Eval checks that none of the forbidden tools were called.

func (*ToolsNotCalledHandler) Type ¶

func (h *ToolsNotCalledHandler) Type() string

Type returns the eval type identifier.

type ToolsNotCalledSessionHandler ¶

type ToolsNotCalledSessionHandler struct{}

ToolsNotCalledSessionHandler checks that specific tools were NOT called anywhere in the session. Params: tool_names []string.

func (*ToolsNotCalledSessionHandler) Eval ¶

func (h *ToolsNotCalledSessionHandler) Eval(
	ctx context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (_ *evals.EvalResult, _ error)

Eval ensures forbidden tools were never called across the session.

func (*ToolsNotCalledSessionHandler) Type ¶

func (h *ToolsNotCalledSessionHandler) Type() string

Type returns the eval type identifier.

type VideoDurationHandler ¶ added in v1.3.2

type VideoDurationHandler struct{}

VideoDurationHandler checks that video duration is within range. Params: min_seconds float64, max_seconds float64.

func (*VideoDurationHandler) Eval ¶ added in v1.3.2

func (h *VideoDurationHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks video duration constraints.

func (*VideoDurationHandler) Type ¶ added in v1.3.2

func (h *VideoDurationHandler) Type() string

Type returns the eval type identifier.

type VideoResolutionHandler ¶ added in v1.3.2

type VideoResolutionHandler struct{}

VideoResolutionHandler checks that video resolution meets requirements. Params: min_width, max_width, min_height, max_height, presets []string.

func (*VideoResolutionHandler) Eval ¶ added in v1.3.2

func (h *VideoResolutionHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks video resolution constraints.

func (*VideoResolutionHandler) Type ¶ added in v1.3.2

func (h *VideoResolutionHandler) Type() string

Type returns the eval type identifier.

type WorkflowCompleteHandler ¶ added in v1.3.2

type WorkflowCompleteHandler struct{}

WorkflowCompleteHandler checks that the workflow reached a terminal state. Reads evalCtx.Extras["workflow_complete"] (bool) and ["workflow_current_state"] (string).

func (*WorkflowCompleteHandler) Eval ¶ added in v1.3.2

func (h *WorkflowCompleteHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	_ map[string]any,
) (*evals.EvalResult, error)

Eval checks whether the workflow is in a terminal state.

func (*WorkflowCompleteHandler) Type ¶ added in v1.3.2

func (h *WorkflowCompleteHandler) Type() string

Type returns the eval type identifier.

type WorkflowStateIsHandler ¶ added in v1.3.2

type WorkflowStateIsHandler struct{}

WorkflowStateIsHandler checks that the current workflow state matches an expected value. Params: state string (required).

func (*WorkflowStateIsHandler) Eval ¶ added in v1.3.2

func (h *WorkflowStateIsHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks if the current workflow state matches the expected value.

func (*WorkflowStateIsHandler) Type ¶ added in v1.3.2

func (h *WorkflowStateIsHandler) Type() string

Type returns the eval type identifier.

type WorkflowTransitionedToHandler ¶ added in v1.3.2

type WorkflowTransitionedToHandler struct{}

WorkflowTransitionedToHandler checks that a transition to a specific state occurred. Params: state string (required).

func (*WorkflowTransitionedToHandler) Eval ¶ added in v1.3.2

func (h *WorkflowTransitionedToHandler) Eval(
	_ context.Context,
	evalCtx *evals.EvalContext,
	params map[string]any,
) (*evals.EvalResult, error)

Eval checks if the workflow transitioned to the specified state.

func (*WorkflowTransitionedToHandler) Type ¶ added in v1.3.2

func (h *WorkflowTransitionedToHandler) Type() string

Type returns the eval type identifier.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL