adapters

package
v1.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 15, 2026 License: Apache-2.0 Imports: 9 Imported by: 0

Documentation

Overview

Package adapters provides pluggable recording format adapters for Arena evaluation. It supports loading saved conversations from various formats (session recordings, arena output files, transcripts) into Arena-friendly structures.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ArenaAssertionResult

type ArenaAssertionResult struct {
	Type    string `json:"type"`
	Passed  bool   `json:"passed"`
	Message string `json:"message,omitempty"`
}

ArenaAssertionResult represents an assertion result.

type ArenaContentPart

type ArenaContentPart struct {
	Type  string              `json:"type"`
	Text  *string             `json:"text,omitempty"`
	Media *types.MediaContent `json:"media,omitempty"`
}

ArenaContentPart represents a content part in arena output.

type ArenaCost

type ArenaCost struct {
	InputTokens  int     `json:"input_tokens"`
	OutputTokens int     `json:"output_tokens"`
	TotalCost    float64 `json:"total_cost"`
}

ArenaCost represents cost information.

type ArenaMessage

type ArenaMessage struct {
	Content string             `json:"content"`
	Parts   []ArenaContentPart `json:"parts,omitempty"`
}

ArenaMessage represents a message in the arena output.

type ArenaOutputAdapter

type ArenaOutputAdapter struct{}

ArenaOutputAdapter loads Arena output JSON files (from completed scenario runs).

func NewArenaOutputAdapter

func NewArenaOutputAdapter() *ArenaOutputAdapter

NewArenaOutputAdapter creates a new arena output adapter.

func (*ArenaOutputAdapter) CanHandle

func (a *ArenaOutputAdapter) CanHandle(source string, typeHint string) bool

CanHandle returns true for *.arena-output.json files or "arena_output" type hint.

func (*ArenaOutputAdapter) Enumerate

func (a *ArenaOutputAdapter) Enumerate(source string) ([]RecordingReference, error)

Enumerate expands a source into individual recording references. For file-based sources, this expands glob patterns to matching files.

func (*ArenaOutputAdapter) Load

Load reads an arena output file and converts it to Arena messages.

type ArenaOutputFile

type ArenaOutputFile struct {
	ScenarioID string              `json:"scenario_id"`
	ProviderID string              `json:"provider_id"`
	Metadata   ArenaOutputMetadata `json:"metadata"`
	Turns      []ArenaOutputTurn   `json:"turns"`
	Summary    ArenaOutputSummary  `json:"summary"`
}

ArenaOutputFile represents the structure of an arena output JSON file.

type ArenaOutputMetadata

type ArenaOutputMetadata struct {
	Tags   []string               `json:"tags,omitempty"`
	Extras map[string]interface{} `json:"extras,omitempty"`
}

ArenaOutputMetadata contains metadata about the scenario run.

type ArenaOutputSummary

type ArenaOutputSummary struct {
	TotalTurns  int     `json:"total_turns"`
	PassedTurns int     `json:"passed_turns"`
	FailedTurns int     `json:"failed_turns"`
	TotalCost   float64 `json:"total_cost"`
}

ArenaOutputSummary contains summary statistics.

type ArenaOutputTurn

type ArenaOutputTurn struct {
	TurnIndex   int                    `json:"turn_index"`
	Timestamp   time.Time              `json:"timestamp"`
	UserMessage ArenaMessage           `json:"user_message"`
	Response    ArenaResponse          `json:"response"`
	ToolResults []ArenaToolResult      `json:"tool_results,omitempty"`
	Assertions  []ArenaAssertionResult `json:"assertions,omitempty"`
}

ArenaOutputTurn represents a single turn in the arena output.

type ArenaResponse

type ArenaResponse struct {
	Message types.Message `json:"message"`
	Cost    *ArenaCost    `json:"cost,omitempty"`
}

ArenaResponse represents a provider response in arena output.

type ArenaToolResult

type ArenaToolResult struct {
	ToolCallID string `json:"tool_call_id"`
	Content    string `json:"content"`
}

ArenaToolResult represents a tool execution result.

type MediaSource

type MediaSource struct {
	MIMEType string
	Data     string
	URI      string
	Path     string
	Size     int64
	Width    int
	Height   int
	Duration int64 // milliseconds
}

MediaSource defines the source data for media content conversion.

type ProviderSpec

type ProviderSpec struct {
	Type  string `json:"type" yaml:"type"`
	Model string `json:"model" yaml:"model"`
	ID    string `json:"id" yaml:"id"`
}

ProviderSpec describes a provider configuration for judge targets.

type RecordedContentPart

type RecordedContentPart struct {
	Type  string             `json:"type"`
	Text  *string            `json:"text,omitempty"`
	Media *RecordedMediaPart `json:"media,omitempty"`
}

RecordedContentPart represents a content part in the recording.

type RecordedMediaPart

type RecordedMediaPart struct {
	MIMEType string `json:"mime_type,omitempty"`
	Data     string `json:"data,omitempty"` // Base64 encoded
	URI      string `json:"uri,omitempty"`  // URL
	Path     string `json:"path,omitempty"` // File path
	Size     int64  `json:"size,omitempty"`
	Width    int    `json:"width,omitempty"`
	Height   int    `json:"height,omitempty"`
	Duration int64  `json:"duration,omitempty"` // milliseconds
}

RecordedMediaPart represents media content in the recording.

type RecordedMsg

type RecordedMsg struct {
	Role       string                `json:"role"`
	Content    string                `json:"content"`
	Name       string                `json:"name,omitempty"`
	Parts      []RecordedContentPart `json:"parts,omitempty"`
	ToolCalls  []RecordedToolCall    `json:"tool_calls,omitempty"`
	ToolCallID string                `json:"tool_call_id,omitempty"`
}

RecordedMsg represents a message in the recording.

type RecordedToolCall

type RecordedToolCall struct {
	ID       string                   `json:"id"`
	Type     string                   `json:"type"`
	Function RecordedToolCallFunction `json:"function"`
}

RecordedToolCall represents a tool call in the recording.

type RecordedToolCallFunction

type RecordedToolCallFunction struct {
	Name      string `json:"name"`
	Arguments string `json:"arguments"`
}

RecordedToolCallFunction represents a function call within a tool call.

type RecordingAdapter

type RecordingAdapter interface {
	// CanHandle returns true if this adapter supports the given source/type hint.
	// The source could be a file path, glob pattern, database query, etc.
	// typeHint is an optional explicit format indicator from the eval config.
	CanHandle(source string, typeHint string) bool

	// Enumerate expands a source into individual recording references.
	// For file-based adapters, this expands glob patterns to matching files.
	// For database adapters, this could execute a query and return record IDs.
	// Returns a single-element slice for non-expandable sources.
	Enumerate(source string) ([]RecordingReference, error)

	// Load converts a recording to Arena message format.
	// The reference should have been obtained from Enumerate.
	// Returns the messages, metadata, and any error encountered.
	Load(ref RecordingReference) ([]types.Message, *RecordingMetadata, error)
}

RecordingAdapter converts saved conversations from various formats into Arena-friendly structures for evaluation.

type RecordingEvent

type RecordingEvent struct {
	Type      string      `json:"type"`
	Timestamp time.Time   `json:"timestamp"`
	Message   RecordedMsg `json:"message,omitempty"`
	Event     interface{} `json:"event,omitempty"`
}

RecordingEvent represents a single event in the recording.

type RecordingMetadata

type RecordingMetadata struct {
	// JudgeTargets maps judge names to provider specifications.
	// Used by LLM judge assertions to determine which provider to use.
	JudgeTargets map[string]ProviderSpec `json:"judge_targets,omitempty" yaml:"judge_targets,omitempty"`

	// ProviderInfo contains information about the original provider(s)
	// that generated the recorded conversation.
	ProviderInfo map[string]interface{} `json:"provider_info,omitempty" yaml:"provider_info,omitempty"`

	// Tags are optional labels for categorizing/filtering recordings.
	Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`

	// Timestamps contains the timestamp for each turn in the conversation.
	// The length should match the number of messages.
	Timestamps []time.Time `json:"timestamps,omitempty" yaml:"timestamps,omitempty"`

	// SessionID is the unique identifier for the recorded session.
	SessionID string `json:"session_id,omitempty" yaml:"session_id,omitempty"`

	// Duration is the total duration of the conversation.
	Duration time.Duration `json:"duration,omitempty" yaml:"duration,omitempty"`

	// Extras holds any additional metadata from the recording.
	Extras map[string]interface{} `json:"extras,omitempty" yaml:"extras,omitempty"`
}

RecordingMetadata contains metadata extracted from the recording that should flow through to the evaluation context.

type RecordingMetadataFile

type RecordingMetadataFile struct {
	SessionID  string   `json:"session_id"`
	ProviderID string   `json:"provider_id,omitempty"`
	Model      string   `json:"model,omitempty"`
	Tags       []string `json:"tags,omitempty"`
}

RecordingMetadataFile contains metadata about the recording session.

type RecordingReference

type RecordingReference struct {
	// ID is a unique identifier for this recording reference.
	// For file-based adapters, this is typically the file path.
	// For database adapters, this could be a record ID.
	ID string `json:"id" yaml:"id"`

	// Source is the original source pattern/query that produced this reference.
	// For file-based adapters, this is the original glob pattern or path.
	Source string `json:"source" yaml:"source"`

	// TypeHint is an optional format indicator (e.g., "session", "arena_output", "transcript").
	TypeHint string `json:"type_hint,omitempty" yaml:"type_hint,omitempty"`

	// Metadata contains adapter-specific metadata about this reference.
	// This can include file size, modification time, database metadata, etc.
	Metadata map[string]interface{} `json:"metadata,omitempty" yaml:"metadata,omitempty"`
}

RecordingReference is an opaque reference to a single recording. It abstracts the underlying storage mechanism (file, database, API, etc.) allowing adapters to enumerate and load recordings from various sources.

func EnumerateFiles

func EnumerateFiles(source, typeHint string) ([]RecordingReference, error)

EnumerateFiles is a helper for file-based adapters to expand glob patterns. It returns recording references for each matching file. If the source doesn't contain glob characters, it returns a single reference.

type Registry

type Registry struct {
	// contains filtered or unexported fields
}

Registry manages registered recording adapters.

func NewEmptyRegistry

func NewEmptyRegistry() *Registry

NewEmptyRegistry creates an empty adapter registry without any built-in adapters. This is useful for testing or when you want to register only specific adapters.

func NewRegistry

func NewRegistry() *Registry

NewRegistry creates a new adapter registry with default adapters registered.

func (*Registry) Enumerate

func (r *Registry) Enumerate(source, typeHint string) ([]RecordingReference, error)

Enumerate expands a source into individual recording references. Uses the first adapter that can handle the source. Returns an error if no adapter can handle the source or if enumeration fails.

func (*Registry) FindAdapter

func (r *Registry) FindAdapter(path, typeHint string) RecordingAdapter

FindAdapter returns the first adapter that can handle the given path and type hint. Returns nil if no adapter can handle the format.

func (*Registry) Load

Load finds an appropriate adapter and loads the recording from a reference. Returns an error if no adapter can handle the format or if loading fails.

func (*Registry) Register

func (r *Registry) Register(adapter RecordingAdapter)

Register adds an adapter to the registry. Adapters are checked in registration order, so register more specific adapters before generic ones.

type SessionRecordingAdapter

type SessionRecordingAdapter struct{}

SessionRecordingAdapter loads PromptKit session recordings (*.recording.json).

func NewSessionRecordingAdapter

func NewSessionRecordingAdapter() *SessionRecordingAdapter

NewSessionRecordingAdapter creates a new session recording adapter.

func (*SessionRecordingAdapter) CanHandle

func (a *SessionRecordingAdapter) CanHandle(source, typeHint string) bool

CanHandle returns true for *.recording.json files or "session" type hint.

func (*SessionRecordingAdapter) Enumerate

func (a *SessionRecordingAdapter) Enumerate(source string) ([]RecordingReference, error)

Enumerate expands a source into individual recording references. For file-based sources, this expands glob patterns to matching files.

func (*SessionRecordingAdapter) Load

Load reads a session recording file and converts it to Arena messages.

type SessionRecordingFile

type SessionRecordingFile struct {
	Metadata RecordingMetadataFile `json:"metadata"`
	Events   []RecordingEvent      `json:"events"`
}

SessionRecordingFile represents the structure of a *.recording.json file.

type TranscriptAdapter

type TranscriptAdapter struct{}

TranscriptAdapter loads transcript YAML files (*.transcript.yaml).

func NewTranscriptAdapter

func NewTranscriptAdapter() *TranscriptAdapter

NewTranscriptAdapter creates a new transcript adapter.

func (*TranscriptAdapter) CanHandle

func (a *TranscriptAdapter) CanHandle(source, typeHint string) bool

CanHandle returns true for *.transcript.yaml files or "transcript" type hint.

func (*TranscriptAdapter) Enumerate

func (a *TranscriptAdapter) Enumerate(source string) ([]RecordingReference, error)

Enumerate expands a source into individual recording references. For file-based sources, this expands glob patterns to matching files.

func (*TranscriptAdapter) Load

Load reads a transcript file and converts it to Arena messages.

type TranscriptContentPart

type TranscriptContentPart struct {
	Type  string               `yaml:"type"`
	Text  *string              `yaml:"text,omitempty"`
	Media *TranscriptMediaPart `yaml:"media,omitempty"`
}

TranscriptContentPart represents a content part in the transcript.

type TranscriptFile

type TranscriptFile struct {
	Metadata TranscriptMetadata  `yaml:"metadata"`
	Messages []TranscriptMessage `yaml:"messages"`
}

TranscriptFile represents the structure of a *.transcript.yaml file.

type TranscriptMediaPart

type TranscriptMediaPart struct {
	MIMEType string `yaml:"mime_type,omitempty"`
	Data     string `yaml:"data,omitempty"` // Base64 encoded
	URI      string `yaml:"uri,omitempty"`  // URL
	Path     string `yaml:"path,omitempty"` // File path
	Size     int64  `yaml:"size,omitempty"`
	Width    int    `yaml:"width,omitempty"`
	Height   int    `yaml:"height,omitempty"`
	Duration int64  `yaml:"duration,omitempty"` // milliseconds
}

TranscriptMediaPart represents media content in the transcript.

type TranscriptMessage

type TranscriptMessage struct {
	Role       string                  `yaml:"role"`
	Content    string                  `yaml:"content"`
	Name       string                  `yaml:"name,omitempty"`
	Timestamp  string                  `yaml:"timestamp,omitempty"`
	Parts      []TranscriptContentPart `yaml:"parts,omitempty"`
	ToolCalls  []TranscriptToolCall    `yaml:"tool_calls,omitempty"`
	ToolCallID string                  `yaml:"tool_call_id,omitempty"`
}

TranscriptMessage represents a message in the transcript.

type TranscriptMetadata

type TranscriptMetadata struct {
	SessionID    string                            `yaml:"session_id,omitempty"`
	Provider     string                            `yaml:"provider,omitempty"`
	Model        string                            `yaml:"model,omitempty"`
	Tags         []string                          `yaml:"tags,omitempty"`
	JudgeTargets map[string]TranscriptProviderSpec `yaml:"judge_targets,omitempty"`
}

TranscriptMetadata contains metadata about the transcript.

type TranscriptProviderSpec

type TranscriptProviderSpec struct {
	Type  string `yaml:"type"`
	Model string `yaml:"model"`
	ID    string `yaml:"id"`
}

TranscriptProviderSpec describes a provider for judge targets in transcripts.

type TranscriptToolCall

type TranscriptToolCall struct {
	ID       string                     `yaml:"id"`
	Type     string                     `yaml:"type"`
	Function TranscriptToolCallFunction `yaml:"function"`
}

TranscriptToolCall represents a tool call in the transcript.

type TranscriptToolCallFunction

type TranscriptToolCallFunction struct {
	Name      string `yaml:"name"`
	Arguments string `yaml:"arguments"`
}

TranscriptToolCallFunction represents a function call within a tool call.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL