Versions in this module Expand all Collapse all v0 v0.4.1 May 2, 2026 v0.4.0 May 1, 2026 Changes in this version + var NewEvalProvider func(dispatcher AgentDispatcher, emitter BusEmitter) *EvalProvider + var NowISO func() string + func RegisterEvalTools(server *mcp.Server, provider *EvalProvider) + type AgentDispatcher interface + DispatchToHarness func(ctx context.Context, req DispatchRequest) (*DispatchBatchResult, error) + type BusEmitter interface + EmitCogBlock func(ctx context.Context, channelName string, block any) error + type BusEvent struct + BusID string + From string + Hash string + Payload map[string]interface{} + Seq int + Ts string + Type string + V int + type BusReader interface + ReadChannel func(ctx context.Context, channelName string, since string) ([]BusEvent, error) + type Case struct + MaxTokens int + Name string + Prompt string + Rubric Rubric + SystemPrompt string + Tags []string + type Delta struct + BaselineKey string + BaselinePassRate *float64 + Delta float64 + TaskDeltas map[string]*float64 + VariantKey string + VariantPassRate *float64 + type DispatchBatchResult struct + Notes []string + Results []DispatchResult + TotalDurationSec float64 + type DispatchRequest struct + AgentID string + Model string + N int + SystemPrompt string + Task string + Thinking *bool + TimeoutSeconds int + Tools []string + type DispatchResult struct + Content string + DurationSec float64 + Error string + Index int + ModelUsed string + Success bool + ToolCalls []DispatchToolCallSummary + Turns int + type DispatchScoredResult struct + func NewDispatchScoredResult(r DispatchResult, toolCalls []string) *DispatchScoredResult + func (d *DispatchScoredResult) Content() string + func (d *DispatchScoredResult) FinishReason() string + func (d *DispatchScoredResult) ToolCallNames() []string + type DispatchToolCallSummary struct + ArgsDigest string + Error string + Name string + ResultDigest string + type EvalActionType string + const EvalActionRefreshBaseline + const EvalActionRetryRegression + const EvalActionRun + const EvalActionRunIncremental + const EvalActionSkip + type EvalConfig struct + BaselinePins map[string]string + Experiments map[string]*Experiment + TournamentRoot string + type EvalLiveState struct + FetchedAt string + Scorecards map[string]*Scorecard + Trials []TrialRecord + type EvalPlanDetail struct + EvalAction EvalActionType + ExperimentID string + RegressionCells [][2]string + StaleAfter string + TrialSpecs []TrialSpec + type EvalProvider struct + func New(dispatcher AgentDispatcher, emitter BusEmitter) *EvalProvider + func NewWithReader(dispatcher AgentDispatcher, emitter BusEmitter, busReader BusReader) *EvalProvider + func (e *EvalProvider) ApplyPlan(ctx context.Context, plan *reconcile.Plan) ([]reconcile.Result, error) + func (e *EvalProvider) BuildState(config any, live any, existing *reconcile.State) (*reconcile.State, error) + func (e *EvalProvider) ComputePlan(config any, live any, state *reconcile.State) (*reconcile.Plan, error) + func (e *EvalProvider) FetchLive(ctx context.Context, config any) (any, error) + func (e *EvalProvider) Health() reconcile.ResourceStatus + func (e *EvalProvider) LoadConfig(root string) (any, error) + func (e *EvalProvider) Type() string + type EvalProviderState struct + CircuitBreakerThreshold int + InFlightTrialIDs []string + LastReconcileAt string + RecentFailureCounts map[string]int + type Experiment struct + AutoReconcile bool + BaselinePinned string + BaselineVariant string + ID string + Tags []string + Target string + TaskIDs []string + Title string + VariantAxes map[string][]string + type FileBusReader struct + func NewFileBusReader(eventsPath string) *FileBusReader + func (r *FileBusReader) ReadChannel(ctx context.Context, channelName string, since string) ([]BusEvent, error) + type HTTPBusReader struct + func NewHTTPBusReader(kernelURL string) *HTTPBusReader + func (r *HTTPBusReader) ReadChannel(ctx context.Context, channelName string, since string) ([]BusEvent, error) + type Rubric struct + ContentContains []string + ContentContainsCI []string + ContentMustNotContain []string + ContentMustNotContainCI []string + ExpectedTools []string + ExpectedToolsAnyOf []string + FirstToolOneOf []string + ForbiddenTools []string + type RunSummary struct + EndedAt string + ExperimentID string + Failed int + Model string + Passed int + RunID string + StartedAt string + Target string + Total int + type Scorecard struct + Cells map[[2]string]ScorecardCell + ExperimentID string + TaskIDs []string + VariantKeys []string + type ScorecardCell = *bool + type ScoredResult interface + Content func() string + FinishReason func() string + ToolCallNames func() []string + type ToolCallRecord struct + ArgsDigest string + Error string + Name string + ResultDigest string + type TrialRecord struct + CogBlockHash string + Content string + DurationSec float64 + ExperimentID string + Failures []string + Model string + Notes []string + Passed bool + Reasoning string + TDWired bool + Target string + TaskID string + Timestamp string + ToolCalls []ToolCallRecord + TrialID string + VariantIDs map[string]string + type TrialSpec struct + ExperimentID string + SystemPromptVariant *Variant + Target string + TaskVariant Variant + ToolDescriptionVariant *Variant + TrialID string + VariantIDs map[string]string + type Variant struct + Ablation string + BaselineOf string + Class VariantClass + Content any + ID string + SourcePath string + Tags []string + type VariantClass string + const VariantClassExperiment + const VariantClassSystemPrompt + const VariantClassTask + const VariantClassToolDescription + type Verdict struct + Failures []string + Notes []string + Passed bool + func Score(rubric Rubric, result ScoredResult) Verdict