harness

package
v0.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 20, 2026 License: MIT Imports: 21 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrMockTimeout = errors.New("mock harness: operation timed out")
	ErrMockFailed  = errors.New("mock harness: execution failed")
)

Common mock errors for testing

Functions

func BuildPrompt

func BuildPrompt(t *task.Task, workspace string, sameWorkspace bool, prompt string, status *RepoStatus) string

BuildPrompt creates the full prompt with header.

func CanResolveCLI

func CanResolveCLI(name string) bool

CanResolveCLI returns true if subtask can likely invoke the given CLI name on this machine, including common install locations.

Note: this is intentionally "side-effect free" (it does not invoke a shell), so it may return false even if the command would be available via a shell alias.

func FormatRepoStatusWarning

func FormatRepoStatusWarning(baseBranch string, status *RepoStatus) string

Types

type BuiltinMock

type BuiltinMock struct {
	ToolCalls int
}

BuiltinMock is a simple mock harness for CLI testing.

func (*BuiltinMock) DuplicateSession

func (m *BuiltinMock) DuplicateSession(sessionID, oldCwd, newCwd string) (string, error)

func (*BuiltinMock) MigrateSession

func (m *BuiltinMock) MigrateSession(sessionID, oldCwd, newCwd string) error

func (*BuiltinMock) Review

func (m *BuiltinMock) Review(cwd string, target ReviewTarget, instructions string) (string, error)

func (*BuiltinMock) Run

func (m *BuiltinMock) Run(ctx context.Context, cwd, prompt, continueFrom string, cb Callbacks) (*Result, error)

type Callbacks

type Callbacks struct {
	OnSessionStart func(sessionID string) // Called when session starts (thread.started)
	OnToolCall     func(time.Time)        // Called for each tool call
}

Callbacks for harness events.

type ClaudeHarness

type ClaudeHarness struct {
	Model          string
	PermissionMode string // default: bypassPermissions
	Tools          string // optional, maps to --tools
	// contains filtered or unexported fields
}

ClaudeHarness implements Harness for the Claude Code CLI.

func (*ClaudeHarness) DuplicateSession

func (c *ClaudeHarness) DuplicateSession(sessionID, oldCwd, newCwd string) (string, error)

func (*ClaudeHarness) MigrateSession

func (c *ClaudeHarness) MigrateSession(sessionID, oldCwd, newCwd string) error

func (*ClaudeHarness) Review

func (c *ClaudeHarness) Review(cwd string, target ReviewTarget, instructions string) (string, error)

Review runs a code review using the standard Run infrastructure.

func (*ClaudeHarness) Run

func (c *ClaudeHarness) Run(ctx context.Context, cwd, prompt, continueFrom string, cb Callbacks) (*Result, error)

Run executes Claude with the given prompt. Blocks until completion.

type CodexEvent

type CodexEvent struct {
	Type     string `json:"type"`
	ThreadID string `json:"thread_id,omitempty"` // in thread.started
	Message  string `json:"message,omitempty"`   // in error event
	Item     *struct {
		ID      string `json:"id,omitempty"`
		Type    string `json:"type,omitempty"` // command_execution, agent_message, reasoning
		Text    string `json:"text,omitempty"`
		Command string `json:"command,omitempty"`
	} `json:"item,omitempty"`
	Error *struct {
		Message string `json:"message"`
	} `json:"error,omitempty"` // in turn.failed
}

CodexEvent represents a JSONL event from codex exec --json.

type CodexHarness

type CodexHarness struct {
	Model     string
	Reasoning string
	// contains filtered or unexported fields
}

CodexHarness implements Harness for the Codex CLI.

func (*CodexHarness) DuplicateSession

func (c *CodexHarness) DuplicateSession(sessionID, oldCwd, newCwd string) (string, error)

func (*CodexHarness) MigrateSession

func (c *CodexHarness) MigrateSession(sessionID, oldCwd, newCwd string) error

func (*CodexHarness) Review

func (c *CodexHarness) Review(cwd string, target ReviewTarget, instructions string) (string, error)

Review runs codex exec review using the shared command infrastructure.

func (*CodexHarness) Run

func (c *CodexHarness) Run(ctx context.Context, cwd, prompt, continueFrom string, cb Callbacks) (*Result, error)

Run executes Codex with the given prompt. Blocks until completion.

type DuplicateCall

type DuplicateCall struct {
	SessionID string
	OldCWD    string
	NewCWD    string
	Timestamp time.Time
}

DuplicateCall records parameters from a DuplicateSession invocation.

type Harness

type Harness interface {
	// Run executes the worker. Blocks until completion.
	Run(ctx context.Context, cwd, prompt, continueFrom string, cb Callbacks) (*Result, error)

	// Review runs harness-specific review command.
	// target specifies what to review (uncommitted, base branch, or commit).
	// instructions is optional additional review instructions.
	Review(cwd string, target ReviewTarget, instructions string) (string, error)

	// MigrateSession moves a session from oldCwd to newCwd when a workspace path changes.
	// Returns nil if migration succeeds or is unnecessary (Codex).
	MigrateSession(sessionID, oldCwd, newCwd string) error

	// DuplicateSession creates a new session ID that starts with the same history
	// as `sessionID`, but is usable from `newCwd`.
	//
	// The original session must remain unchanged.
	// Returns the new session ID.
	DuplicateSession(sessionID, oldCwd, newCwd string) (string, error)
}

Harness is the interface for worker backends.

func New

func New(cfg *workspace.Config) (Harness, error)

New creates a harness from config.

type MigrateCall

type MigrateCall struct {
	SessionID string
	OldCWD    string
	NewCWD    string
	Timestamp time.Time
}

MigrateCall records parameters from a MigrateSession invocation.

type MockHarness

type MockHarness struct {

	// Configuration for Run behavior
	RunResult   *Result
	RunError    error
	ToolCallN   int           // Number of times to call onToolCall
	ToolCallGap time.Duration // Delay between tool calls

	// Configuration for Review behavior
	ReviewResult string
	ReviewError  error

	// Configuration for DuplicateSession behavior
	DuplicateResult string
	DuplicateError  error

	// Call tracking for assertions
	RunCalls       []RunCall
	MigrateCalls   []MigrateCall
	DuplicateCalls []DuplicateCall
	ReviewCalls    []ReviewCall
	// contains filtered or unexported fields
}

MockHarness implements Harness for testing.

func NewMockHarness

func NewMockHarness() *MockHarness

NewMockHarness creates a mock with default successful behavior.

func (*MockHarness) DuplicateSession

func (m *MockHarness) DuplicateSession(sessionID, oldCwd, newCwd string) (string, error)

DuplicateSession implements Harness.DuplicateSession.

func (*MockHarness) LastRunCall

func (m *MockHarness) LastRunCall() *RunCall

LastRunCall returns the most recent Run call, or nil if none.

func (*MockHarness) MigrateSession

func (m *MockHarness) MigrateSession(sessionID, oldCwd, newCwd string) error

MigrateSession implements Harness.MigrateSession.

func (*MockHarness) Reset

func (m *MockHarness) Reset()

Reset clears all call tracking.

func (*MockHarness) Review

func (m *MockHarness) Review(cwd string, target ReviewTarget, instructions string) (string, error)

Review implements Harness.Review.

func (*MockHarness) Run

func (m *MockHarness) Run(ctx context.Context, cwd, prompt, continueFrom string, cb Callbacks) (*Result, error)

Run implements Harness.Run with configurable behavior.

func (*MockHarness) RunCallCount

func (m *MockHarness) RunCallCount() int

RunCallCount returns number of Run invocations.

func (*MockHarness) WithDuplicateError

func (m *MockHarness) WithDuplicateError(err error) *MockHarness

WithDuplicateError configures DuplicateSession to return an error.

func (*MockHarness) WithDuplicateResult

func (m *MockHarness) WithDuplicateResult(sessionID string) *MockHarness

WithDuplicateResult configures DuplicateSession to return a specific new session ID.

func (*MockHarness) WithError

func (m *MockHarness) WithError(err error) *MockHarness

WithError configures the mock to return an error.

func (*MockHarness) WithFailedStart

func (m *MockHarness) WithFailedStart(errorMsg string) *MockHarness

WithFailedStart simulates a failure before session started.

func (*MockHarness) WithPartialResult

func (m *MockHarness) WithPartialResult(sessionID, errorMsg string) *MockHarness

WithPartialResult simulates a partial execution (prompt delivered but no reply).

func (*MockHarness) WithResult

func (m *MockHarness) WithResult(reply, sessionID string) *MockHarness

WithResult configures the mock to return specific results.

func (*MockHarness) WithReviewError

func (m *MockHarness) WithReviewError(err error) *MockHarness

WithReviewError configures review to return an error.

func (*MockHarness) WithReviewResult

func (m *MockHarness) WithReviewResult(result string) *MockHarness

WithReviewResult configures the review response.

func (*MockHarness) WithToolCalls

func (m *MockHarness) WithToolCalls(n int) *MockHarness

WithToolCalls configures simulation of N tool calls.

type OpenCodeHarness

type OpenCodeHarness struct {
	Model   string
	Variant string
	Agent   string
	// contains filtered or unexported fields
}

OpenCodeHarness implements Harness for the OpenCode CLI ("opencode").

func (*OpenCodeHarness) DuplicateSession

func (o *OpenCodeHarness) DuplicateSession(sessionID, oldCwd, newCwd string) (string, error)

func (*OpenCodeHarness) MigrateSession

func (o *OpenCodeHarness) MigrateSession(sessionID, oldCwd, newCwd string) error

func (*OpenCodeHarness) Review

func (o *OpenCodeHarness) Review(cwd string, target ReviewTarget, instructions string) (string, error)

Review runs a code review using the standard Run infrastructure.

func (*OpenCodeHarness) Run

func (o *OpenCodeHarness) Run(ctx context.Context, cwd, prompt, continueFrom string, cb Callbacks) (*Result, error)

type RepoStatus

type RepoStatus struct {
	CommitsBehind int
	ConflictFiles []string
}

type Result

type Result struct {
	Reply           string
	SessionID       string
	PromptDelivered bool   // True if session started (thread.started seen)
	AgentReplied    bool   // True if agent sent a message
	Error           string // Non-empty if execution failed
}

Result is the output from running a harness.

type ReviewCall

type ReviewCall struct {
	CWD          string
	Target       ReviewTarget
	Instructions string
	Timestamp    time.Time
}

ReviewCall records parameters from a Review invocation.

type ReviewTarget

type ReviewTarget struct {
	// Exactly one of these should be set:
	Uncommitted bool   // Review staged, unstaged, and untracked changes
	BaseBranch  string // Review changes against this base branch
	Commit      string // Review changes introduced by this commit SHA
}

ReviewTarget specifies what to review.

type RunCall

type RunCall struct {
	CWD          string
	Prompt       string
	ContinueFrom string
	ToolCalls    int
	Timestamp    time.Time
}

RunCall records parameters from a Run invocation.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL