headless

package
v0.1.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 23, 2026 License: Apache-2.0 Imports: 26 Imported by: 0

Documentation

Overview

Package headless runs Errata recipe tasks without user interaction.

It iterates over the tasks defined in a recipe, fans each out to all configured model adapters via runner.RunAll, evaluates success criteria, and produces a structured JSON report.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func RunDirName

func RunDirName(recipeName, reportID string) string

RunDirName returns the directory name for a bundled run output.

func Save

func Save(dir string, report *RunReport) (string, error)

Save writes the report as pretty-printed JSON to dir/report.json. Parent directories are created as needed. Returns the full path.

func SaveMetadata

func SaveMetadata(dir string, report *MetadataReport) (string, error)

SaveMetadata writes the metadata report as pretty-printed JSON to dir/meta.json. Parent directories are created as needed. Returns the full path.

Types

type MetaModelResult

type MetaModelResult struct {
	ModelID           string         `json:"model_id"`
	LatencyMS         int64          `json:"latency_ms"`
	InputTokens       int64          `json:"input_tokens"`
	OutputTokens      int64          `json:"output_tokens"`
	ReasoningTokens   int64          `json:"reasoning_tokens,omitempty"`
	CostUSD           float64        `json:"cost_usd"`
	StopReason        string         `json:"stop_reason,omitempty"`
	Steps             int            `json:"steps,omitempty"`
	ToolCalls         map[string]int `json:"tool_calls,omitempty"`
	FilesChangedCount int            `json:"files_changed_count"`
	Error             string         `json:"error,omitempty"`
}

MetaModelResult captures per-model metrics without text or file contents.

type MetaRecipeSnapshot

type MetaRecipeSnapshot struct {
	Name            string   `json:"name"`
	Version         int      `json:"version"`
	Models          []string `json:"models,omitempty"`
	Tasks           []string `json:"tasks"`
	SuccessCriteria []string `json:"success_criteria,omitempty"`
}

MetaRecipeSnapshot captures recipe configuration without the system prompt.

type MetaTaskResult

type MetaTaskResult struct {
	Index           int                          `json:"index"`
	PromptHash      string                       `json:"prompt_hash"`
	Models          []MetaModelResult            `json:"models"`
	CriteriaResults map[string][]criteria.Result `json:"criteria_results,omitempty"`
	SelectedModel   string                       `json:"selected_model,omitempty"`
}

MetaTaskResult captures one task's metrics without sensitive content.

type MetadataReport

type MetadataReport struct {
	ID        string    `json:"id"`
	Timestamp time.Time `json:"timestamp"`
	SessionID string    `json:"session_id"`

	Recipe   MetaRecipeSnapshot `json:"recipe"`
	TaskMode string             `json:"task_mode"`

	Tasks []MetaTaskResult `json:"tasks"`

	Summary Summary `json:"summary"`
}

MetadataReport is a shareable, redacted report containing only benchmark metrics — no prompts, responses, file contents, or raw events.

func BuildMetadataReport

func BuildMetadataReport(full *RunReport) *MetadataReport

BuildMetadataReport constructs a MetadataReport from a full RunReport, hashing prompts and stripping sensitive content.

func LoadMetadata

func LoadMetadata(path string) (*MetadataReport, error)

LoadMetadata reads a MetadataReport JSON file at the given path.

type ModelSummary

type ModelSummary struct {
	TasksSucceeded int     `json:"tasks_succeeded"`
	CriteriaPassed int     `json:"criteria_passed"`
	CriteriaTotal  int     `json:"criteria_total"`
	TotalCostUSD   float64 `json:"total_cost_usd"`
	AvgLatencyMS   float64 `json:"avg_latency_ms"`
}

ModelSummary is per-model aggregate across all tasks.

type Options

type Options struct {
	Recipe         *recipe.Recipe
	Adapters       []models.ModelAdapter
	SessionID      string
	Cfg            config.Config
	OutputDir      string // directory for output reports (required)
	CheckpointPath string // path for checkpoint file (required)
	Verbose        bool
	JSON           bool // also emit report to stdout

	// DebugLog enables raw API request logging in adapter loops.
	DebugLog bool

	// MCP state.
	MCPDefs        []tools.ToolDef
	MCPDispatchers map[string]tools.MCPDispatcher

	// Stderr is the writer for progress output. nil → os.Stderr.
	Stderr io.Writer
}

Options controls headless execution behaviour.

type RecipeSnapshot

type RecipeSnapshot struct {
	Name            string   `json:"name"`
	Version         int      `json:"version"`
	Models          []string `json:"models,omitempty"`
	SystemPrompt    string   `json:"system_prompt,omitempty"`
	Tasks           []string `json:"tasks"`
	SuccessCriteria []string `json:"success_criteria,omitempty"`
}

RecipeSnapshot is a JSON-safe subset of recipe.Recipe for the report.

type RunReport

type RunReport struct {
	ID        string    `json:"id"`
	Timestamp time.Time `json:"timestamp"`
	SessionID string    `json:"session_id"`

	Recipe   RecipeSnapshot `json:"recipe"`
	TaskMode string         `json:"task_mode"`

	Tasks []TaskResult `json:"tasks"`

	Summary Summary   `json:"summary"`
	Setup   SetupInfo `json:"setup"`
}

RunReport is the top-level JSON report produced by `errata run`.

func Load

func Load(path string) (*RunReport, error)

Load reads a RunReport JSON file at the given path.

func Run

func Run(ctx context.Context, opts *Options) (*RunReport, error)

Run executes all recipe tasks and returns the headless report.

func (*RunReport) Filename

func (r *RunReport) Filename() string

Filename returns the fixed filename within the bundled run directory.

type SetupInfo

type SetupInfo struct {
	WorktreeBase string            `json:"worktree_base"`
	SetupMS      int64             `json:"setup_ms"`
	GitMode      bool              `json:"git_mode"`
	ModelDirs    map[string]string `json:"model_dirs"`
}

SetupInfo records worktree creation metadata for debugging.

type Summary

type Summary struct {
	TotalTasks     int                     `json:"total_tasks"`
	CompletedTasks int                     `json:"completed_tasks"`
	TotalCostUSD   float64                 `json:"total_cost_usd"`
	PerModel       map[string]ModelSummary `json:"per_model"`
}

Summary aggregates across all tasks.

type TaskResult

type TaskResult struct {
	Index           int                          `json:"index"`
	Prompt          string                       `json:"prompt"`
	Report          *output.Report               `json:"report"`
	CriteriaResults map[string][]criteria.Result `json:"criteria_results"`
	SelectedModel   string                       `json:"selected_model,omitempty"`
}

TaskResult captures one task's execution and evaluation.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL