evaluation

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 26, 2026 License: MIT Imports: 2 Imported by: 0

Documentation

Overview

Package evaluation provides types for detailed evaluation reports with severity-based findings and recommendations. This is suited for LLM-as-Judge style reviews like PRD and ARB evaluations.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ActionItem

type ActionItem struct {
	// Action describes what needs to be done.
	Action string `json:"action"`

	// Category is the related evaluation category.
	Category string `json:"category,omitempty"`

	// Severity is the related finding severity.
	Severity Severity `json:"severity,omitempty"`

	// Owner suggests who should do this.
	Owner string `json:"owner,omitempty"`

	// Effort estimates work required.
	Effort string `json:"effort,omitempty"`
}

ActionItem is a specific action to take.

type CategoryScore

type CategoryScore struct {
	// Category is the name/ID of the category.
	Category string `json:"category"`

	// Weight is the category weight (0.0-1.0, should sum to 1.0).
	Weight float64 `json:"weight"`

	// Score is the category score (0.0-10.0).
	Score float64 `json:"score"`

	// MaxScore is the maximum possible score (default 10.0).
	MaxScore float64 `json:"max_score"`

	// Status is the derived status (pass/warn/fail).
	Status ScoreStatus `json:"status"`

	// Justification explains why this score was given.
	Justification string `json:"justification"`

	// Evidence provides specific supporting evidence.
	Evidence string `json:"evidence,omitempty"`

	// Findings are issues found in this category.
	Findings []Finding `json:"findings,omitempty"`
}

CategoryScore represents a score for a single evaluation category.

func NewCategoryScore

func NewCategoryScore(category string, weight, score float64, justification string) CategoryScore

NewCategoryScore creates a category score with computed status.

func (*CategoryScore) ComputeStatus

func (c *CategoryScore) ComputeStatus() ScoreStatus

ComputeStatus calculates the status from the score.

func (*CategoryScore) ComputeWeightedScore

func (c *CategoryScore) ComputeWeightedScore() float64

ComputeWeightedScore calculates the weighted contribution of this category.

type Decision

type Decision struct {
	// Status is the decision outcome.
	Status DecisionStatus `json:"status"`

	// Passed indicates if the evaluation passed.
	Passed bool `json:"passed"`

	// Rationale explains the decision.
	Rationale string `json:"rationale"`

	// FindingCounts summarizes findings by severity.
	FindingCounts FindingCounts `json:"finding_counts"`

	// WeightedScore is the final weighted score.
	WeightedScore float64 `json:"weighted_score"`
}

Decision represents the evaluation decision.

func Evaluate

func Evaluate(findings []Finding, weightedScore float64, criteria PassCriteria) Decision

Evaluate checks findings and score against criteria.

type DecisionStatus

type DecisionStatus string

DecisionStatus represents the decision outcome.

const (
	DecisionPass        DecisionStatus = "pass"         // Meets all criteria
	DecisionConditional DecisionStatus = "conditional"  // Meets score but has findings
	DecisionFail        DecisionStatus = "fail"         // Has blocking findings
	DecisionHumanReview DecisionStatus = "human_review" // Requires human judgment
)

type EvaluationReport

type EvaluationReport struct {
	// Schema is the JSON Schema URL.
	Schema string `json:"$schema,omitempty"`

	// Metadata contains report identification and audit info.
	Metadata ReportMetadata `json:"metadata"`

	// ReviewType identifies the type of review (prd, arb, security, etc.).
	ReviewType string `json:"review_type"`

	// Categories contains scores for each evaluation dimension.
	Categories []CategoryScore `json:"categories"`

	// Findings are all issues discovered during evaluation.
	Findings []Finding `json:"findings"`

	// WeightedScore is the overall weighted score.
	WeightedScore float64 `json:"weighted_score"`

	// PassCriteria defines the requirements for approval.
	PassCriteria PassCriteria `json:"pass_criteria"`

	// Decision is the evaluation outcome.
	Decision Decision `json:"decision"`

	// NextSteps provides actionable guidance.
	NextSteps NextSteps `json:"next_steps"`

	// Summary is the overall assessment.
	Summary string `json:"summary"`
}

EvaluationReport is the detailed evaluation report for LLM-as-Judge reviews.

func NewEvaluationReport

func NewEvaluationReport(reviewType, document string) *EvaluationReport

NewEvaluationReport creates a new evaluation report.

func (*EvaluationReport) AddCategory

func (r *EvaluationReport) AddCategory(cs CategoryScore)

AddCategory adds a category score.

func (*EvaluationReport) AddFinding

func (r *EvaluationReport) AddFinding(f Finding)

AddFinding adds a finding.

func (*EvaluationReport) ComputeWeightedScore

func (r *EvaluationReport) ComputeWeightedScore() float64

ComputeWeightedScore calculates the overall weighted score.

func (*EvaluationReport) Evaluate

func (r *EvaluationReport) Evaluate() Decision

Evaluate computes the decision based on findings and score.

func (*EvaluationReport) Finalize

func (r *EvaluationReport) Finalize(rerunCommand string)

Finalize computes all derived fields.

func (*EvaluationReport) GenerateNextSteps

func (r *EvaluationReport) GenerateNextSteps(rerunCommand string)

GenerateNextSteps creates actionable next steps.

func (*EvaluationReport) GenerateSummary

func (r *EvaluationReport) GenerateSummary() string

GenerateSummary creates the summary text.

type Finding

type Finding struct {
	// ID is the unique identifier for this finding.
	ID string `json:"id"`

	// Category is the evaluation category this relates to.
	Category string `json:"category"`

	// Severity indicates the impact level.
	Severity Severity `json:"severity"`

	// Title is a brief summary of the finding.
	Title string `json:"title"`

	// Description provides detailed explanation.
	Description string `json:"description"`

	// Recommendation explains how to fix the issue.
	Recommendation string `json:"recommendation"`

	// Evidence provides specific examples or references.
	Evidence string `json:"evidence,omitempty"`

	// Owner suggests who should address this finding.
	Owner string `json:"owner,omitempty"`

	// Effort estimates the work required (low, medium, high).
	Effort string `json:"effort,omitempty"`
}

Finding represents an issue discovered during evaluation.

func (*Finding) IsBlocking

func (f *Finding) IsBlocking() bool

IsBlocking returns true if this finding blocks approval.

type FindingCounts

type FindingCounts struct {
	Critical int `json:"critical"`
	High     int `json:"high"`
	Medium   int `json:"medium"`
	Low      int `json:"low"`
	Info     int `json:"info"`
	Total    int `json:"total"`
}

FindingCounts tracks the number of findings by severity.

func CountFindings

func CountFindings(findings []Finding) FindingCounts

CountFindings counts findings by severity.

func (FindingCounts) BlockingCount

func (c FindingCounts) BlockingCount() int

BlockingCount returns the number of blocking findings.

func (FindingCounts) HasBlocking

func (c FindingCounts) HasBlocking() bool

HasBlocking returns true if there are any blocking findings.

type NextSteps

type NextSteps struct {
	// RerunCommand is the command to re-run evaluation.
	RerunCommand string `json:"rerun_command"`

	// Immediate are blocking actions that must be completed.
	Immediate []ActionItem `json:"immediate,omitempty"`

	// Recommended are suggested improvements.
	Recommended []ActionItem `json:"recommended,omitempty"`
}

NextSteps provides actionable workflow guidance.

type PassCriteria

type PassCriteria struct {
	// MaxCritical is the maximum allowed critical findings (default 0).
	MaxCritical int `json:"max_critical"`

	// MaxHigh is the maximum allowed high severity findings (default 0).
	MaxHigh int `json:"max_high"`

	// MaxMedium is the maximum allowed medium findings (-1 = unlimited).
	MaxMedium int `json:"max_medium,omitempty"`

	// MinScore is the minimum weighted score required.
	MinScore float64 `json:"min_score"`
}

PassCriteria defines the requirements for approval.

func DefaultPassCriteria

func DefaultPassCriteria() PassCriteria

DefaultPassCriteria returns standard pass criteria. Zero Critical/High, minimum score 7.0.

func StrictPassCriteria

func StrictPassCriteria() PassCriteria

StrictPassCriteria returns strict pass criteria. Zero Critical/High, max 3 Medium, minimum score 8.0.

type ReportMetadata

type ReportMetadata struct {
	// Document is the filename or path being evaluated.
	Document string `json:"document"`

	// DocumentID is the document identifier (e.g., PRD ID).
	DocumentID string `json:"document_id,omitempty"`

	// DocumentTitle is the document title.
	DocumentTitle string `json:"document_title,omitempty"`

	// DocumentVersion is the document version.
	DocumentVersion string `json:"document_version,omitempty"`

	// GeneratedAt is when the report was created.
	GeneratedAt time.Time `json:"generated_at"`

	// GeneratedBy identifies what created this report.
	GeneratedBy string `json:"generated_by,omitempty"`

	// ReviewerID identifies the reviewer (agent or human).
	ReviewerID string `json:"reviewer_id,omitempty"`
}

ReportMetadata contains report identification.

type ScoreStatus

type ScoreStatus string

ScoreStatus represents the pass/warn/fail status for a category score.

const (
	ScoreStatusPass          ScoreStatus = "pass"              // Score >= 7.0
	ScoreStatusWarn          ScoreStatus = "warn"              // Score >= 5.0 && < 7.0
	ScoreStatusFail          ScoreStatus = "fail"              // Score < 5.0
	CategoryPending          ScoreStatus = "pending"           // Not yet evaluated
	CategoryNeedsImprovement ScoreStatus = "needs_improvement" // Requires attention
)

func (ScoreStatus) Icon

func (s ScoreStatus) Icon() string

Icon returns the emoji icon for the score status.

type Severity

type Severity string

Severity represents the severity level of a finding. Based on InfoSec severity classifications.

const (
	SeverityCritical Severity = "critical" // Blocks approval, must fix
	SeverityHigh     Severity = "high"     // Blocks approval, must fix
	SeverityMedium   Severity = "medium"   // Should fix before approval
	SeverityLow      Severity = "low"      // Nice to fix
	SeverityInfo     Severity = "info"     // Informational only
)

func AllSeverities

func AllSeverities() []Severity

AllSeverities returns all severity levels in order of severity.

func (Severity) Icon

func (s Severity) Icon() string

Icon returns the emoji icon for the severity.

func (Severity) IsBlocking

func (s Severity) IsBlocking() bool

IsBlocking returns true if this severity blocks approval.

func (Severity) Weight

func (s Severity) Weight() int

Weight returns a numeric weight for sorting (higher = more severe).

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL