Documentation
¶
Overview ¶
Package evaluation provides types for detailed evaluation reports with severity-based findings and recommendations. This is suited for LLM-as-Judge style reviews like PRD and ARB evaluations.
Index ¶
- type ActionItem
- type CategoryScore
- type Decision
- type DecisionStatus
- type EvaluationReport
- func (r *EvaluationReport) AddCategory(cs CategoryScore)
- func (r *EvaluationReport) AddFinding(f Finding)
- func (r *EvaluationReport) ComputeWeightedScore() float64
- func (r *EvaluationReport) Evaluate() Decision
- func (r *EvaluationReport) Finalize(rerunCommand string)
- func (r *EvaluationReport) GenerateNextSteps(rerunCommand string)
- func (r *EvaluationReport) GenerateSummary() string
- type Finding
- type FindingCounts
- type NextSteps
- type PassCriteria
- type ReportMetadata
- type ScoreStatus
- type Severity
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ActionItem ¶
type ActionItem struct {
// Action describes what needs to be done.
Action string `json:"action"`
// Category is the related evaluation category.
Category string `json:"category,omitempty"`
// Severity is the related finding severity.
Severity Severity `json:"severity,omitempty"`
// Owner suggests who should do this.
Owner string `json:"owner,omitempty"`
// Effort estimates work required.
Effort string `json:"effort,omitempty"`
}
ActionItem is a specific action to take.
type CategoryScore ¶
type CategoryScore struct {
// Category is the name/ID of the category.
Category string `json:"category"`
// Weight is the category weight (0.0-1.0, should sum to 1.0).
Weight float64 `json:"weight"`
// Score is the category score (0.0-10.0).
Score float64 `json:"score"`
// MaxScore is the maximum possible score (default 10.0).
MaxScore float64 `json:"max_score"`
// Status is the derived status (pass/warn/fail).
Status ScoreStatus `json:"status"`
// Justification explains why this score was given.
Justification string `json:"justification"`
// Evidence provides specific supporting evidence.
Evidence string `json:"evidence,omitempty"`
// Findings are issues found in this category.
Findings []Finding `json:"findings,omitempty"`
}
CategoryScore represents a score for a single evaluation category.
func NewCategoryScore ¶
func NewCategoryScore(category string, weight, score float64, justification string) CategoryScore
NewCategoryScore creates a category score with computed status.
func (*CategoryScore) ComputeStatus ¶
func (c *CategoryScore) ComputeStatus() ScoreStatus
ComputeStatus calculates the status from the score.
func (*CategoryScore) ComputeWeightedScore ¶
func (c *CategoryScore) ComputeWeightedScore() float64
ComputeWeightedScore calculates the weighted contribution of this category.
type Decision ¶
type Decision struct {
// Status is the decision outcome.
Status DecisionStatus `json:"status"`
// Passed indicates if the evaluation passed.
Passed bool `json:"passed"`
// Rationale explains the decision.
Rationale string `json:"rationale"`
// FindingCounts summarizes findings by severity.
FindingCounts FindingCounts `json:"finding_counts"`
// WeightedScore is the final weighted score.
WeightedScore float64 `json:"weighted_score"`
}
Decision represents the evaluation decision.
type DecisionStatus ¶
type DecisionStatus string
DecisionStatus represents the decision outcome.
const ( DecisionPass DecisionStatus = "pass" // Meets all criteria DecisionConditional DecisionStatus = "conditional" // Meets score but has findings DecisionFail DecisionStatus = "fail" // Has blocking findings DecisionHumanReview DecisionStatus = "human_review" // Requires human judgment )
type EvaluationReport ¶
type EvaluationReport struct {
// Schema is the JSON Schema URL.
Schema string `json:"$schema,omitempty"`
// Metadata contains report identification and audit info.
Metadata ReportMetadata `json:"metadata"`
// ReviewType identifies the type of review (prd, arb, security, etc.).
ReviewType string `json:"review_type"`
// Categories contains scores for each evaluation dimension.
Categories []CategoryScore `json:"categories"`
// Findings are all issues discovered during evaluation.
Findings []Finding `json:"findings"`
// WeightedScore is the overall weighted score.
WeightedScore float64 `json:"weighted_score"`
// PassCriteria defines the requirements for approval.
PassCriteria PassCriteria `json:"pass_criteria"`
// Decision is the evaluation outcome.
Decision Decision `json:"decision"`
// NextSteps provides actionable guidance.
NextSteps NextSteps `json:"next_steps"`
// Summary is the overall assessment.
Summary string `json:"summary"`
}
EvaluationReport is the detailed evaluation report for LLM-as-Judge reviews.
func NewEvaluationReport ¶
func NewEvaluationReport(reviewType, document string) *EvaluationReport
NewEvaluationReport creates a new evaluation report.
func (*EvaluationReport) AddCategory ¶
func (r *EvaluationReport) AddCategory(cs CategoryScore)
AddCategory adds a category score.
func (*EvaluationReport) AddFinding ¶
func (r *EvaluationReport) AddFinding(f Finding)
AddFinding adds a finding.
func (*EvaluationReport) ComputeWeightedScore ¶
func (r *EvaluationReport) ComputeWeightedScore() float64
ComputeWeightedScore calculates the overall weighted score.
func (*EvaluationReport) Evaluate ¶
func (r *EvaluationReport) Evaluate() Decision
Evaluate computes the decision based on findings and score.
func (*EvaluationReport) Finalize ¶
func (r *EvaluationReport) Finalize(rerunCommand string)
Finalize computes all derived fields.
func (*EvaluationReport) GenerateNextSteps ¶
func (r *EvaluationReport) GenerateNextSteps(rerunCommand string)
GenerateNextSteps creates actionable next steps.
func (*EvaluationReport) GenerateSummary ¶
func (r *EvaluationReport) GenerateSummary() string
GenerateSummary creates the summary text.
type Finding ¶
type Finding struct {
// ID is the unique identifier for this finding.
ID string `json:"id"`
// Category is the evaluation category this relates to.
Category string `json:"category"`
// Severity indicates the impact level.
Severity Severity `json:"severity"`
// Title is a brief summary of the finding.
Title string `json:"title"`
// Description provides detailed explanation.
Description string `json:"description"`
// Recommendation explains how to fix the issue.
Recommendation string `json:"recommendation"`
// Evidence provides specific examples or references.
Evidence string `json:"evidence,omitempty"`
// Owner suggests who should address this finding.
Owner string `json:"owner,omitempty"`
// Effort estimates the work required (low, medium, high).
Effort string `json:"effort,omitempty"`
}
Finding represents an issue discovered during evaluation.
func (*Finding) IsBlocking ¶
IsBlocking returns true if this finding blocks approval.
type FindingCounts ¶
type FindingCounts struct {
Critical int `json:"critical"`
High int `json:"high"`
Medium int `json:"medium"`
Low int `json:"low"`
Info int `json:"info"`
Total int `json:"total"`
}
FindingCounts tracks the number of findings by severity.
func CountFindings ¶
func CountFindings(findings []Finding) FindingCounts
CountFindings counts findings by severity.
func (FindingCounts) BlockingCount ¶
func (c FindingCounts) BlockingCount() int
BlockingCount returns the number of blocking findings.
func (FindingCounts) HasBlocking ¶
func (c FindingCounts) HasBlocking() bool
HasBlocking returns true if there are any blocking findings.
type NextSteps ¶
type NextSteps struct {
// RerunCommand is the command to re-run evaluation.
RerunCommand string `json:"rerun_command"`
// Immediate are blocking actions that must be completed.
Immediate []ActionItem `json:"immediate,omitempty"`
// Recommended are suggested improvements.
Recommended []ActionItem `json:"recommended,omitempty"`
}
NextSteps provides actionable workflow guidance.
type PassCriteria ¶
type PassCriteria struct {
// MaxCritical is the maximum allowed critical findings (default 0).
MaxCritical int `json:"max_critical"`
// MaxHigh is the maximum allowed high severity findings (default 0).
MaxHigh int `json:"max_high"`
// MaxMedium is the maximum allowed medium findings (-1 = unlimited).
MaxMedium int `json:"max_medium,omitempty"`
// MinScore is the minimum weighted score required.
MinScore float64 `json:"min_score"`
}
PassCriteria defines the requirements for approval.
func DefaultPassCriteria ¶
func DefaultPassCriteria() PassCriteria
DefaultPassCriteria returns standard pass criteria. Zero Critical/High, minimum score 7.0.
func StrictPassCriteria ¶
func StrictPassCriteria() PassCriteria
StrictPassCriteria returns strict pass criteria. Zero Critical/High, max 3 Medium, minimum score 8.0.
type ReportMetadata ¶
type ReportMetadata struct {
// Document is the filename or path being evaluated.
Document string `json:"document"`
// DocumentID is the document identifier (e.g., PRD ID).
DocumentID string `json:"document_id,omitempty"`
// DocumentTitle is the document title.
DocumentTitle string `json:"document_title,omitempty"`
// DocumentVersion is the document version.
DocumentVersion string `json:"document_version,omitempty"`
// GeneratedAt is when the report was created.
GeneratedAt time.Time `json:"generated_at"`
// GeneratedBy identifies what created this report.
GeneratedBy string `json:"generated_by,omitempty"`
// ReviewerID identifies the reviewer (agent or human).
ReviewerID string `json:"reviewer_id,omitempty"`
}
ReportMetadata contains report identification.
type ScoreStatus ¶
type ScoreStatus string
ScoreStatus represents the pass/warn/fail status for a category score.
const ( ScoreStatusPass ScoreStatus = "pass" // Score >= 7.0 ScoreStatusWarn ScoreStatus = "warn" // Score >= 5.0 && < 7.0 ScoreStatusFail ScoreStatus = "fail" // Score < 5.0 CategoryPending ScoreStatus = "pending" // Not yet evaluated CategoryNeedsImprovement ScoreStatus = "needs_improvement" // Requires attention )
func (ScoreStatus) Icon ¶
func (s ScoreStatus) Icon() string
Icon returns the emoji icon for the score status.
type Severity ¶
type Severity string
Severity represents the severity level of a finding. Based on InfoSec severity classifications.
func AllSeverities ¶
func AllSeverities() []Severity
AllSeverities returns all severity levels in order of severity.
func (Severity) IsBlocking ¶
IsBlocking returns true if this severity blocks approval.