Documentation
¶
Overview ¶
Package adapters provides pluggable recording format adapters for Arena evaluation. It supports loading saved conversations from various formats (session recordings, arena output files, transcripts) into Arena-friendly structures.
Index ¶
- type ArenaAssertionResult
- type ArenaContentPart
- type ArenaCost
- type ArenaMessage
- type ArenaOutputAdapter
- type ArenaOutputFile
- type ArenaOutputMetadata
- type ArenaOutputSummary
- type ArenaOutputTurn
- type ArenaResponse
- type ArenaToolResult
- type MediaSource
- type ProviderSpec
- type RecordedContentPart
- type RecordedMediaPart
- type RecordedMsg
- type RecordedToolCall
- type RecordedToolCallFunction
- type RecordingAdapter
- type RecordingEvent
- type RecordingMetadata
- type RecordingMetadataFile
- type RecordingReference
- type Registry
- func (r *Registry) Enumerate(source, typeHint string) ([]RecordingReference, error)
- func (r *Registry) FindAdapter(path, typeHint string) RecordingAdapter
- func (r *Registry) Load(ref RecordingReference) ([]types.Message, *RecordingMetadata, error)
- func (r *Registry) Register(adapter RecordingAdapter)
- type SessionRecordingAdapter
- type SessionRecordingFile
- type TranscriptAdapter
- type TranscriptContentPart
- type TranscriptFile
- type TranscriptMediaPart
- type TranscriptMessage
- type TranscriptMetadata
- type TranscriptProviderSpec
- type TranscriptToolCall
- type TranscriptToolCallFunction
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ArenaAssertionResult ¶
type ArenaAssertionResult struct {
Type string `json:"type"`
Passed bool `json:"passed"`
Message string `json:"message,omitempty"`
}
ArenaAssertionResult represents an assertion result.
type ArenaContentPart ¶
type ArenaContentPart struct {
Type string `json:"type"`
Text *string `json:"text,omitempty"`
Media *types.MediaContent `json:"media,omitempty"`
}
ArenaContentPart represents a content part in arena output.
type ArenaCost ¶
type ArenaCost struct {
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
TotalCost float64 `json:"total_cost"`
}
ArenaCost represents cost information.
type ArenaMessage ¶
type ArenaMessage struct {
Content string `json:"content"`
Parts []ArenaContentPart `json:"parts,omitempty"`
}
ArenaMessage represents a message in the arena output.
type ArenaOutputAdapter ¶
type ArenaOutputAdapter struct{}
ArenaOutputAdapter loads Arena output JSON files (from completed scenario runs).
func NewArenaOutputAdapter ¶
func NewArenaOutputAdapter() *ArenaOutputAdapter
NewArenaOutputAdapter creates a new arena output adapter.
func (*ArenaOutputAdapter) CanHandle ¶
func (a *ArenaOutputAdapter) CanHandle(source string, typeHint string) bool
CanHandle returns true for *.arena-output.json files or "arena_output" type hint.
func (*ArenaOutputAdapter) Enumerate ¶
func (a *ArenaOutputAdapter) Enumerate(source string) ([]RecordingReference, error)
Enumerate expands a source into individual recording references. For file-based sources, this expands glob patterns to matching files.
func (*ArenaOutputAdapter) Load ¶
func (a *ArenaOutputAdapter) Load(ref RecordingReference) ([]types.Message, *RecordingMetadata, error)
Load reads an arena output file and converts it to Arena messages.
type ArenaOutputFile ¶
type ArenaOutputFile struct {
ScenarioID string `json:"scenario_id"`
ProviderID string `json:"provider_id"`
Metadata ArenaOutputMetadata `json:"metadata"`
Turns []ArenaOutputTurn `json:"turns"`
Summary ArenaOutputSummary `json:"summary"`
}
ArenaOutputFile represents the structure of an arena output JSON file.
type ArenaOutputMetadata ¶
type ArenaOutputMetadata struct {
Tags []string `json:"tags,omitempty"`
Extras map[string]interface{} `json:"extras,omitempty"`
}
ArenaOutputMetadata contains metadata about the scenario run.
type ArenaOutputSummary ¶
type ArenaOutputSummary struct {
TotalTurns int `json:"total_turns"`
PassedTurns int `json:"passed_turns"`
FailedTurns int `json:"failed_turns"`
TotalCost float64 `json:"total_cost"`
}
ArenaOutputSummary contains summary statistics.
type ArenaOutputTurn ¶
type ArenaOutputTurn struct {
TurnIndex int `json:"turn_index"`
Timestamp time.Time `json:"timestamp"`
UserMessage ArenaMessage `json:"user_message"`
Response ArenaResponse `json:"response"`
ToolResults []ArenaToolResult `json:"tool_results,omitempty"`
Assertions []ArenaAssertionResult `json:"assertions,omitempty"`
}
ArenaOutputTurn represents a single turn in the arena output.
type ArenaResponse ¶
type ArenaResponse struct {
Message types.Message `json:"message"`
Cost *ArenaCost `json:"cost,omitempty"`
}
ArenaResponse represents a provider response in arena output.
type ArenaToolResult ¶
type ArenaToolResult struct {
ToolCallID string `json:"tool_call_id"`
Content string `json:"content"`
}
ArenaToolResult represents a tool execution result.
type MediaSource ¶
type MediaSource struct {
MIMEType string
Data string
URI string
Path string
Size int64
Width int
Height int
Duration int64 // milliseconds
}
MediaSource defines the source data for media content conversion.
type ProviderSpec ¶
type ProviderSpec struct {
Type string `json:"type" yaml:"type"`
Model string `json:"model" yaml:"model"`
ID string `json:"id" yaml:"id"`
}
ProviderSpec describes a provider configuration for judge targets.
type RecordedContentPart ¶
type RecordedContentPart struct {
Type string `json:"type"`
Text *string `json:"text,omitempty"`
Media *RecordedMediaPart `json:"media,omitempty"`
}
RecordedContentPart represents a content part in the recording.
type RecordedMediaPart ¶
type RecordedMediaPart struct {
MIMEType string `json:"mime_type,omitempty"`
Data string `json:"data,omitempty"` // Base64 encoded
URI string `json:"uri,omitempty"` // URL
Path string `json:"path,omitempty"` // File path
Size int64 `json:"size,omitempty"`
Width int `json:"width,omitempty"`
Height int `json:"height,omitempty"`
Duration int64 `json:"duration,omitempty"` // milliseconds
}
RecordedMediaPart represents media content in the recording.
type RecordedMsg ¶
type RecordedMsg struct {
Role string `json:"role"`
Content string `json:"content"`
Name string `json:"name,omitempty"`
Parts []RecordedContentPart `json:"parts,omitempty"`
ToolCalls []RecordedToolCall `json:"tool_calls,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"`
}
RecordedMsg represents a message in the recording.
type RecordedToolCall ¶
type RecordedToolCall struct {
ID string `json:"id"`
Type string `json:"type"`
Function RecordedToolCallFunction `json:"function"`
}
RecordedToolCall represents a tool call in the recording.
type RecordedToolCallFunction ¶
type RecordedToolCallFunction struct {
Name string `json:"name"`
Arguments string `json:"arguments"`
}
RecordedToolCallFunction represents a function call within a tool call.
type RecordingAdapter ¶
type RecordingAdapter interface {
// CanHandle returns true if this adapter supports the given source/type hint.
// The source could be a file path, glob pattern, database query, etc.
// typeHint is an optional explicit format indicator from the eval config.
CanHandle(source string, typeHint string) bool
// Enumerate expands a source into individual recording references.
// For file-based adapters, this expands glob patterns to matching files.
// For database adapters, this could execute a query and return record IDs.
// Returns a single-element slice for non-expandable sources.
Enumerate(source string) ([]RecordingReference, error)
// Load converts a recording to Arena message format.
// The reference should have been obtained from Enumerate.
// Returns the messages, metadata, and any error encountered.
Load(ref RecordingReference) ([]types.Message, *RecordingMetadata, error)
}
RecordingAdapter converts saved conversations from various formats into Arena-friendly structures for evaluation.
type RecordingEvent ¶
type RecordingEvent struct {
Type string `json:"type"`
Timestamp time.Time `json:"timestamp"`
Message RecordedMsg `json:"message,omitempty"`
Event interface{} `json:"event,omitempty"`
}
RecordingEvent represents a single event in the recording.
type RecordingMetadata ¶
type RecordingMetadata struct {
// JudgeTargets maps judge names to provider specifications.
// Used by LLM judge assertions to determine which provider to use.
JudgeTargets map[string]ProviderSpec `json:"judge_targets,omitempty" yaml:"judge_targets,omitempty"`
// ProviderInfo contains information about the original provider(s)
// that generated the recorded conversation.
ProviderInfo map[string]interface{} `json:"provider_info,omitempty" yaml:"provider_info,omitempty"`
// Tags are optional labels for categorizing/filtering recordings.
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
// Timestamps contains the timestamp for each turn in the conversation.
// The length should match the number of messages.
Timestamps []time.Time `json:"timestamps,omitempty" yaml:"timestamps,omitempty"`
// SessionID is the unique identifier for the recorded session.
SessionID string `json:"session_id,omitempty" yaml:"session_id,omitempty"`
// Duration is the total duration of the conversation.
Duration time.Duration `json:"duration,omitempty" yaml:"duration,omitempty"`
// Extras holds any additional metadata from the recording.
Extras map[string]interface{} `json:"extras,omitempty" yaml:"extras,omitempty"`
}
RecordingMetadata contains metadata extracted from the recording that should flow through to the evaluation context.
type RecordingMetadataFile ¶
type RecordingMetadataFile struct {
SessionID string `json:"session_id"`
ProviderID string `json:"provider_id,omitempty"`
Model string `json:"model,omitempty"`
Tags []string `json:"tags,omitempty"`
}
RecordingMetadataFile contains metadata about the recording session.
type RecordingReference ¶
type RecordingReference struct {
// ID is a unique identifier for this recording reference.
// For file-based adapters, this is typically the file path.
// For database adapters, this could be a record ID.
ID string `json:"id" yaml:"id"`
// Source is the original source pattern/query that produced this reference.
// For file-based adapters, this is the original glob pattern or path.
Source string `json:"source" yaml:"source"`
// TypeHint is an optional format indicator (e.g., "session", "arena_output", "transcript").
TypeHint string `json:"type_hint,omitempty" yaml:"type_hint,omitempty"`
// Metadata contains adapter-specific metadata about this reference.
// This can include file size, modification time, database metadata, etc.
Metadata map[string]interface{} `json:"metadata,omitempty" yaml:"metadata,omitempty"`
}
RecordingReference is an opaque reference to a single recording. It abstracts the underlying storage mechanism (file, database, API, etc.) allowing adapters to enumerate and load recordings from various sources.
func EnumerateFiles ¶
func EnumerateFiles(source, typeHint string) ([]RecordingReference, error)
EnumerateFiles is a helper for file-based adapters to expand glob patterns. It returns recording references for each matching file. If the source doesn't contain glob characters, it returns a single reference.
type Registry ¶
type Registry struct {
// contains filtered or unexported fields
}
Registry manages registered recording adapters.
func NewEmptyRegistry ¶
func NewEmptyRegistry() *Registry
NewEmptyRegistry creates an empty adapter registry without any built-in adapters. This is useful for testing or when you want to register only specific adapters.
func NewRegistry ¶
func NewRegistry() *Registry
NewRegistry creates a new adapter registry with default adapters registered.
func (*Registry) Enumerate ¶
func (r *Registry) Enumerate(source, typeHint string) ([]RecordingReference, error)
Enumerate expands a source into individual recording references. Uses the first adapter that can handle the source. Returns an error if no adapter can handle the source or if enumeration fails.
func (*Registry) FindAdapter ¶
func (r *Registry) FindAdapter(path, typeHint string) RecordingAdapter
FindAdapter returns the first adapter that can handle the given path and type hint. Returns nil if no adapter can handle the format.
func (*Registry) Load ¶
func (r *Registry) Load(ref RecordingReference) ([]types.Message, *RecordingMetadata, error)
Load finds an appropriate adapter and loads the recording from a reference. Returns an error if no adapter can handle the format or if loading fails.
func (*Registry) Register ¶
func (r *Registry) Register(adapter RecordingAdapter)
Register adds an adapter to the registry. Adapters are checked in registration order, so register more specific adapters before generic ones.
type SessionRecordingAdapter ¶
type SessionRecordingAdapter struct{}
SessionRecordingAdapter loads PromptKit session recordings (*.recording.json).
func NewSessionRecordingAdapter ¶
func NewSessionRecordingAdapter() *SessionRecordingAdapter
NewSessionRecordingAdapter creates a new session recording adapter.
func (*SessionRecordingAdapter) CanHandle ¶
func (a *SessionRecordingAdapter) CanHandle(source, typeHint string) bool
CanHandle returns true for *.recording.json files or "session" type hint.
func (*SessionRecordingAdapter) Enumerate ¶
func (a *SessionRecordingAdapter) Enumerate(source string) ([]RecordingReference, error)
Enumerate expands a source into individual recording references. For file-based sources, this expands glob patterns to matching files.
func (*SessionRecordingAdapter) Load ¶
func (a *SessionRecordingAdapter) Load(ref RecordingReference) ([]types.Message, *RecordingMetadata, error)
Load reads a session recording file and converts it to Arena messages.
type SessionRecordingFile ¶
type SessionRecordingFile struct {
Metadata RecordingMetadataFile `json:"metadata"`
Events []RecordingEvent `json:"events"`
}
SessionRecordingFile represents the structure of a *.recording.json file.
type TranscriptAdapter ¶
type TranscriptAdapter struct{}
TranscriptAdapter loads transcript YAML files (*.transcript.yaml).
func NewTranscriptAdapter ¶
func NewTranscriptAdapter() *TranscriptAdapter
NewTranscriptAdapter creates a new transcript adapter.
func (*TranscriptAdapter) CanHandle ¶
func (a *TranscriptAdapter) CanHandle(source, typeHint string) bool
CanHandle returns true for *.transcript.yaml files or "transcript" type hint.
func (*TranscriptAdapter) Enumerate ¶
func (a *TranscriptAdapter) Enumerate(source string) ([]RecordingReference, error)
Enumerate expands a source into individual recording references. For file-based sources, this expands glob patterns to matching files.
func (*TranscriptAdapter) Load ¶
func (a *TranscriptAdapter) Load(ref RecordingReference) ([]types.Message, *RecordingMetadata, error)
Load reads a transcript file and converts it to Arena messages.
type TranscriptContentPart ¶
type TranscriptContentPart struct {
Type string `yaml:"type"`
Text *string `yaml:"text,omitempty"`
Media *TranscriptMediaPart `yaml:"media,omitempty"`
}
TranscriptContentPart represents a content part in the transcript.
type TranscriptFile ¶
type TranscriptFile struct {
Metadata TranscriptMetadata `yaml:"metadata"`
Messages []TranscriptMessage `yaml:"messages"`
}
TranscriptFile represents the structure of a *.transcript.yaml file.
type TranscriptMediaPart ¶
type TranscriptMediaPart struct {
MIMEType string `yaml:"mime_type,omitempty"`
Data string `yaml:"data,omitempty"` // Base64 encoded
URI string `yaml:"uri,omitempty"` // URL
Path string `yaml:"path,omitempty"` // File path
Size int64 `yaml:"size,omitempty"`
Width int `yaml:"width,omitempty"`
Height int `yaml:"height,omitempty"`
Duration int64 `yaml:"duration,omitempty"` // milliseconds
}
TranscriptMediaPart represents media content in the transcript.
type TranscriptMessage ¶
type TranscriptMessage struct {
Role string `yaml:"role"`
Content string `yaml:"content"`
Name string `yaml:"name,omitempty"`
Timestamp string `yaml:"timestamp,omitempty"`
Parts []TranscriptContentPart `yaml:"parts,omitempty"`
ToolCalls []TranscriptToolCall `yaml:"tool_calls,omitempty"`
ToolCallID string `yaml:"tool_call_id,omitempty"`
}
TranscriptMessage represents a message in the transcript.
type TranscriptMetadata ¶
type TranscriptMetadata struct {
SessionID string `yaml:"session_id,omitempty"`
Provider string `yaml:"provider,omitempty"`
Model string `yaml:"model,omitempty"`
Tags []string `yaml:"tags,omitempty"`
JudgeTargets map[string]TranscriptProviderSpec `yaml:"judge_targets,omitempty"`
}
TranscriptMetadata contains metadata about the transcript.
type TranscriptProviderSpec ¶
type TranscriptProviderSpec struct {
Type string `yaml:"type"`
Model string `yaml:"model"`
ID string `yaml:"id"`
}
TranscriptProviderSpec describes a provider for judge targets in transcripts.
type TranscriptToolCall ¶
type TranscriptToolCall struct {
ID string `yaml:"id"`
Type string `yaml:"type"`
Function TranscriptToolCallFunction `yaml:"function"`
}
TranscriptToolCall represents a tool call in the transcript.
type TranscriptToolCallFunction ¶
type TranscriptToolCallFunction struct {
Name string `yaml:"name"`
Arguments string `yaml:"arguments"`
}
TranscriptToolCallFunction represents a function call within a tool call.