Documentation
¶
Index ¶
- Constants
- type BenchmarkCase
- type BenchmarkReport
- type BenchmarkResult
- type BenchmarkSuite
- type CheckpointStore
- type EvalCase
- type EvalResult
- type EvalSuite
- type Options
- type PlanStep
- type ProgressKind
- type RunState
- type RunSummary
- type Runner
- func (r *Runner) Close()
- func (r *Runner) ListRuns(limit int) ([]RunSummary, error)
- func (r *Runner) Resume(ctx context.Context, runID string) (*RunState, error)
- func (r *Runner) RunBenchmark(ctx context.Context, suite BenchmarkSuite) (*BenchmarkReport, error)
- func (r *Runner) RunEval(ctx context.Context, cases []EvalCase) ([]EvalResult, error)
- func (r *Runner) RunGoal(ctx context.Context, goal string) (*RunState, error)
- type Step
- type TurnStats
- type VerificationCheck
Constants ¶
View Source
const ( StatusRunning = "running" StatusCompleted = "completed" StatusFailed = "failed" StatusMaxTurnsReached = "max_turns_reached" StatusNoProgressStopped = "no_progress_stopped" StatusCancelled = "cancelled" )
View Source
const ( PhasePlanning = "planning" PhaseExecution = "execution" PhaseVerification = "verification" )
View Source
const ( PlanStatusPending = "pending" PlanStatusInProgress = "in_progress" PlanStatusCompleted = "completed" )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BenchmarkCase ¶
type BenchmarkCase struct {
Name string `json:"name"`
Goal string `json:"goal"`
Iterations int `json:"iterations,omitempty"`
MaxTurns int `json:"max_turns,omitempty"`
MustContain []string `json:"must_contain,omitempty"`
MustNotContain []string `json:"must_not_contain,omitempty"`
VerifyCommands []string `json:"verify_commands,omitempty"`
}
type BenchmarkReport ¶
type BenchmarkResult ¶
type BenchmarkResult struct {
CaseName string `json:"case_name"`
Iterations int `json:"iterations"`
PassedIterations int `json:"passed_iterations"`
FailedIterations int `json:"failed_iterations"`
PassRate float64 `json:"pass_rate"`
AverageTurns float64 `json:"average_turns"`
AverageToolCalls float64 `json:"average_tool_calls"`
AverageDurationMS float64 `json:"average_duration_ms"`
LastStatus string `json:"last_status"`
FailureReasons []string `json:"failure_reasons,omitempty"`
}
type BenchmarkSuite ¶
type BenchmarkSuite struct {
Name string `json:"name,omitempty"`
Iterations int `json:"iterations,omitempty"`
Cases []BenchmarkCase `json:"cases"`
}
func LoadBenchmarkSuite ¶
func LoadBenchmarkSuite(path string) (BenchmarkSuite, error)
type CheckpointStore ¶
type CheckpointStore struct {
// contains filtered or unexported fields
}
func NewCheckpointStore ¶
func NewCheckpointStore(baseDir string) (*CheckpointStore, error)
func (*CheckpointStore) List ¶
func (s *CheckpointStore) List(limit int) ([]RunSummary, error)
func (*CheckpointStore) Save ¶
func (s *CheckpointStore) Save(state *RunState) error
type EvalCase ¶
type EvalCase struct {
Name string `json:"name"`
Goal string `json:"goal"`
MaxTurns int `json:"max_turns,omitempty"`
MustContain []string `json:"must_contain,omitempty"`
MustNotContain []string `json:"must_not_contain,omitempty"`
}
func LoadEvalCases ¶
type EvalResult ¶
type Options ¶
type Options struct {
Workspace string `json:"workspace"`
MaxTurns int `json:"max_turns"`
MaxToolCallsPerTurn int `json:"max_tool_calls_per_turn"`
MaxConsecutiveNoToolTurns int `json:"max_consecutive_no_tool_turns"`
MaxConsecutiveInvalidToolArgs int `json:"max_consecutive_invalid_tool_args"`
RequestTimeout time.Duration `json:"request_timeout"`
ToolTimeout time.Duration `json:"tool_timeout"`
RequireCompletionMarker bool `json:"require_completion_marker"`
CompletionMarker string `json:"completion_marker"`
EnablePlanning bool `json:"enable_planning"`
PlanMaxSteps int `json:"plan_max_steps"`
RequireVerification bool `json:"require_verification"`
VerificationCommands []string `json:"verification_commands,omitempty"`
VerifyTimeout time.Duration `json:"verify_timeout"`
// Model overrides the LLM model for this run (the model-router seam). Empty
// uses cfg.Model. Agent/orchestrate/subagent callers set this to
// cfg.ResolveAgentModel() so agent work can use a tool-capable/reasoning model.
Model string `json:"model,omitempty"`
// AutoApproveTools runs the permission checker in Trust mode (allow all).
// Set for subagents, which are headless and would otherwise deny every
// write/exec tool ("Ask" with no prompt). Spawning the subagent is the
// approval. Never set for the interactive main agent.
AutoApproveTools bool `json:"auto_approve_tools"`
EmitArtifacts bool `json:"emit_artifacts"`
ArtifactDir string `json:"artifact_dir,omitempty"`
DisableCheckpoints bool `json:"disable_checkpoints"`
Verbose bool `json:"verbose"`
// OnProgress is an optional callback invoked at key agent events.
// text is a human-readable label. turn/maxTurns are 0 for non-turn events.
// This field is not serialised to JSON (func types are not JSON-safe).
OnProgress func(kind ProgressKind, text string, turn, maxTurns int) `json:"-"`
// OnTurnStats is an optional callback fired after each LLM API call completes.
// It carries timing and token usage data for that call.
OnTurnStats func(TurnStats) `json:"-"`
}
func DefaultOptions ¶
func DefaultOptions() Options
type ProgressKind ¶
type ProgressKind int
ProgressKind identifies an agent progress event.
const ( ProgressTurnStart ProgressKind = iota ProgressToolCall ProgressStepDone ProgressResponse ProgressComplete ProgressError )
type RunState ¶
type RunState struct {
RunID string `json:"run_id"`
Goal string `json:"goal"`
Status string `json:"status"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
CompletedAt *time.Time `json:"completed_at,omitempty"`
Turn int `json:"turn"`
ConsecutiveNoToolTurns int `json:"consecutive_no_tool_turns"`
ConsecutiveInvalidToolArgs int `json:"consecutive_invalid_tool_args"`
ToolCallCount int `json:"tool_call_count"`
Messages []tui.ChatMessage `json:"messages"`
Steps []Step `json:"steps"`
Phase string `json:"phase"`
Plan []PlanStep `json:"plan,omitempty"`
ActivePlanStep int `json:"active_plan_step,omitempty"`
Verification []VerificationCheck `json:"verification,omitempty"`
LastAssistantResponse string `json:"last_assistant_response,omitempty"`
ArtifactBundlePath string `json:"artifact_bundle_path,omitempty"`
Error string `json:"error,omitempty"`
Options Options `json:"options"`
}
func NewRunState ¶
type RunSummary ¶
type Runner ¶
type Runner struct {
// contains filtered or unexported fields
}
func (*Runner) Close ¶ added in v1.8.0
func (r *Runner) Close()
Close releases resources held by the runner (e.g. code graph DB).
func (*Runner) RunBenchmark ¶
func (r *Runner) RunBenchmark(ctx context.Context, suite BenchmarkSuite) (*BenchmarkReport, error)
type TurnStats ¶
type TurnStats struct {
Turn int
MaxTurns int
Elapsed time.Duration
InputTokens int
OutputTokens int
Response string // full assistant content for this turn (may be empty for pure tool-call turns)
ToolCalls []string // names of tools called this turn
}
TurnStats carries per-turn performance data from a completed LLM call.
Click to show internal directories.
Click to hide internal directories.