benchscore

package
v0.10.15 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 8, 2026 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Report

type Report struct {
	Summary Summary       `json:"summary"`
	Tasks   []TaskMetrics `json:"tasks"`
}

func ScoreTaskResultsJSONL

func ScoreTaskResultsJSONL(path string) (Report, error)

type Step

type Step struct {
	Source    string     `json:"source"`
	Message   string     `json:"message"`
	ToolCalls []ToolCall `json:"tool_calls"`
}

type Summary

type Summary struct {
	TotalTasks                   int     `json:"total_tasks"`
	ResolvedTasks                int     `json:"resolved_tasks"`
	ResolvedTaskRate             float64 `json:"resolved_task_rate"`
	ClarificationTrials          int     `json:"clarification_trials"`
	ClarificationQuestionRate    float64 `json:"clarification_question_rate"`
	TotalBashCalls               int     `json:"total_bash_calls"`
	TotalBashAntiPatterns        int     `json:"total_bash_anti_patterns"`
	ShellAntiPatternRate         float64 `json:"shell_anti_pattern_rate"`
	TotalStructuredEditCalls     int     `json:"total_structured_edit_calls"`
	TotalStructuredEditSuccesses int     `json:"total_structured_edit_successes"`
	StructuredEditSuccessRate    float64 `json:"structured_edit_success_rate"`
}

type TaskMetrics

type TaskMetrics struct {
	TaskID                  string  `json:"task_id"`
	Resolved                bool    `json:"resolved"`
	ClarificationDetected   bool    `json:"clarification_detected"`
	BashCalls               int     `json:"bash_calls"`
	BashAntiPatterns        int     `json:"bash_anti_patterns"`
	ShellAntiPatternRate    float64 `json:"shell_anti_pattern_rate"`
	StructuredEditCalls     int     `json:"structured_edit_calls"`
	StructuredEditSuccesses int     `json:"structured_edit_successes"`
	StructuredEditRate      float64 `json:"structured_edit_success_rate"`
}

type TaskResult

type TaskResult struct {
	TaskID         string  `json:"task_id"`
	Reward         *string `json:"reward"`
	TrajectoryFile string  `json:"trajectory_file"`
}

type ToolCall

type ToolCall struct {
	Name      string          `json:"name"`
	Arguments json.RawMessage `json:"arguments"`
	Result    string          `json:"result"`
	Error     string          `json:"error,omitempty"`
}

type Trajectory

type Trajectory struct {
	Steps []Step `json:"steps"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL