rubric

package

v0.11.0 Latest Latest Go to latest Published: Jul 20, 2026 License: MIT Imports: 6 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/plexusone/structured-evaluation

Links

Open Source Insights

Documentation ¶

Overview ¶

Package rubric provides types for rubric-based evaluation reports with categorical scoring and severity-based findings. This is suited for LLM-as-Judge style reviews like PRD and ARB evaluations.

Index ¶

Constants
Variables
func AllReasonCodeCategories() []string
func AllRequiredPassing(results []CategoryResult, rubric *RubricSet) bool
func CountFindingsByCode(findings []Finding) map[ReasonCode]int
func GetRepairPrompt(code ReasonCode) string
func RequiresHumanReview(code ReasonCode) bool
func ValidDecisionStatusValues() []string
func ValidEvaluationTypeValues() []string
func ValidIntegerScoreValues() []int
func ValidReasonCodes() []string
func ValidScaleTypeValues() []string
func ValidScoreValues() []string
func ValidSeverityValues() []string
type ActionItem
type AggregationMethod
type Annotation
type CategoricalAgreement
- func ComputeCategoricalAgreement(results1, results2 []CategoryResult) *CategoricalAgreement
type Category
- func NewCategory(id, name, description string) *Category
- func (c *Category) AddOption(value, label string, criteria ...string) *Category
- func (c *Category) GetOptionForValue(value string) *ScaleOption
- func (c *Category) IsComposite() bool
- func (c *Category) SetEvaluationPrompt(prompt string) *Category
- func (c *Category) SetExamples(examples *CategoryExamples) *Category
- func (c *Category) SetRequired(required bool) *Category
- func (c *Category) SetWeight(weight float64) *Category
- func (c *Category) WithBinary(passCriteria, failCriteria []string) *Category
- func (c *Category) WithChecklist(required, optional []string, threshold *ChecklistThreshold) *Category
- func (c *Category) WithLikert(config *LikertConfig) *Category
- func (c *Category) WithLikert5(anchors []LikertAnchor) *Category
- func (c *Category) WithPassPartialFail(passCriteria, partialCriteria, failCriteria []string) *Category
type CategoryExamples
type CategoryResult
- func NewCategoryResult(category string, score ScoreValue, reasoning string) *CategoryResult
- func NewCategoryResultFromLikert(category string, likertScore int, config *LikertConfig, reasoning string) *CategoryResult
- func NewCategoryResultWithIntScore(category string, intScore IntegerScore, confidence float64, reasoning string) *CategoryResult
- func NewCategoryResultWithNumeric(category string, score ScoreValue, numericScore float64, reasoning string) *CategoryResult
- func (cr *CategoryResult) AddEvidence(evidence ...string) *CategoryResult
- func (cr *CategoryResult) AddFinding(f Finding) *CategoryResult
- func (cr *CategoryResult) AddReasonCode(code ReasonCode) *CategoryResult
- func (cr *CategoryResult) AddReasonCodes(codes ...ReasonCode) *CategoryResult
- func (cr *CategoryResult) GetNumericScore() float64
- func (cr *CategoryResult) HasLowConfidence(threshold ...float64) bool
- func (cr *CategoryResult) HasNumericScore() bool
- func (cr *CategoryResult) IsPassing() bool
- func (cr *CategoryResult) SetChecklistResults(results *ChecklistResults) *CategoryResult
- func (cr *CategoryResult) SetConfidence(confidence float64) *CategoryResult
- func (cr *CategoryResult) SetIntScore(score IntegerScore) *CategoryResult
- func (cr *CategoryResult) SetNumericScore(score float64) *CategoryResult
type CategoryResultCounts
- func CountResults(results []CategoryResult) CategoryResultCounts
- func (c CategoryResultCounts) AllPassing() bool
type ChecklistResults
type ChecklistThreshold
type CoverageReport
- func NewCoverageReport() *CoverageReport
- func (cr *CoverageReport) AddSection(name string, section CoverageSection)
- func (cr *CoverageReport) AllComplete() bool
- func (cr *CoverageReport) ComputeOverall() int
- func (cr *CoverageReport) ComputeOverallWeighted(weights map[string]float64) int
- func (cr *CoverageReport) GetSection(name string) CoverageSection
- func (cr *CoverageReport) HasSection(name string) bool
- func (cr *CoverageReport) MeetsThreshold(threshold int) bool
- func (cr *CoverageReport) SectionsAboveThreshold(threshold int) []string
- func (cr *CoverageReport) SectionsBelowThreshold(threshold int) []string
- func (cr *CoverageReport) SetSection(name string, total, complete int, missing []string) *CoverageReport
type CoverageSection
type Criterion
type CriterionLevel
type Decision
- func EvaluateResults(results []CategoryResult, findings []Finding, criteria PassCriteria, ...) Decision
type DecisionStatus
type EvaluationType
type Example
type Finding
- func NewFinding(id, category string, severity Severity, title, description string) *Finding
- func NewFindingWithCode(id, category string, code ReasonCode, title, description string) *Finding
- func (f *Finding) GetCodeInfo() *ReasonCodeInfo
- func (f *Finding) GetRepairHint() string
- func (f *Finding) GetRepairPrompt() string
- func (f *Finding) IsBlocking() bool
- func (f *Finding) SetCode(code ReasonCode) *Finding
- func (f *Finding) SetEffort(effort string) *Finding
- func (f *Finding) SetEvidence(evidence string) *Finding
- func (f *Finding) SetLocation(location string) *Finding
- func (f *Finding) SetOwner(owner string) *Finding
- func (f *Finding) SetRecommendation(recommendation string) *Finding
type FindingCounts
- func CountFindings(findings []Finding) FindingCounts
- func (c FindingCounts) BlockingCount() int
- func (c FindingCounts) HasBlocking() bool
type FindingLimits
type IRRMetrics
- func ComputeIRR(pairs []RatingPair) *IRRMetrics
- func ComputeIRRFromResults(results1, results2 []CategoryResult) *IRRMetrics
type IntegerScore
- func AllIntegerScores() []IntegerScore
- func ParseIntegerScore(score int) IntegerScore
- func (s IntegerScore) Icon() string
- func (s IntegerScore) IsPassing() bool
- func (s IntegerScore) IsValid() bool
- func (s IntegerScore) String() string
- func (s IntegerScore) ToCategorical() ScoreValue
type JudgeCategoricalScore
type JudgeDisagreement
type JudgeMetadata
- func NewJudgeMetadata(model string) *JudgeMetadata
- func (j *JudgeMetadata) SetLatency(d time.Duration)
- func (j *JudgeMetadata) WithPrompt(template, version string) *JudgeMetadata
- func (j *JudgeMetadata) WithProvider(provider string) *JudgeMetadata
- func (j *JudgeMetadata) WithRubric(id, version string) *JudgeMetadata
- func (j *JudgeMetadata) WithTemperature(temp float64) *JudgeMetadata
- func (j *JudgeMetadata) WithTokenUsage(input, output int) *JudgeMetadata
- func (j *JudgeMetadata) WithTrace(traceID, spanID string) *JudgeMetadata
type LikertAnchor
- func StandardLikert5Anchors() []LikertAnchor
type LikertConfig
type MultiJudgeResult
- func AggregateEvaluations(evaluations []*Rubric, method AggregationMethod) *MultiJudgeResult
type NextSteps
type PairwiseCategoryScore
type PairwiseComparison
- func NewPairwiseComparison(input, outputA, outputB string) *PairwiseComparison
- func (p *PairwiseComparison) AddCategoryScore(category string, winner PairwiseWinner, reasoning string, margin float64)
- func (p *PairwiseComparison) SetWinner(winner PairwiseWinner, reasoning string, confidence float64)
- func (p *PairwiseComparison) SwappedComparison() *PairwiseComparison
type PairwiseResult
- func ComputePairwiseResult(comparisons []PairwiseComparison) *PairwiseResult
type PairwiseWinner
type PassCriteria
- func DefaultPassCriteria() PassCriteria
- func StrictPassCriteria() PassCriteria
type RatingPair
type ReasonCode
- func GetBlockingCodes(findings []Finding) []ReasonCode
- func GetReasonCodesByCategory(category string) []ReasonCode
- func GetReasonCodesBySpecType(specType string) []ReasonCode
- func NormalizeCode(code ReasonCode) ReasonCode
type ReasonCodeInfo
- func GetReasonCodeInfo(code ReasonCode) *ReasonCodeInfo
type ReferenceData
- func NewReferenceData(input, expectedOutput string) *ReferenceData
- func (r *ReferenceData) WithAnnotation(name string, score float64, annotatorID string) *ReferenceData
- func (r *ReferenceData) WithContext(ctx ...string) *ReferenceData
type ReferenceDataset
- func NewReferenceDataset(id, name string) *ReferenceDataset
- func (d *ReferenceDataset) AddItem(item ReferenceData)
- func (d *ReferenceDataset) GetByID(id string) *ReferenceData
type ReportMetadata
type Rubric
- func NewRubric(reviewType, document string) *Rubric
- func (r *Rubric) AddBlocking(code ReasonCode) *Rubric
- func (r *Rubric) AddCategoryResult(cr CategoryResult)
- func (r *Rubric) AddFinding(f Finding)
- func (r *Rubric) CollectBlockingCodes() []ReasonCode
- func (r *Rubric) ComputeOverallConfidence() float64
- func (r *Rubric) ComputeOverallIntScore(rubricSet *RubricSet) IntegerScore
- func (r *Rubric) Evaluate(rubricSet *RubricSet) Decision
- func (r *Rubric) Finalize(rubricSet *RubricSet, rerunCommand string)
- func (r *Rubric) GenerateNextSteps(rerunCommand string)
- func (r *Rubric) GenerateSummary() string
- func (r *Rubric) GetCategoryResult(categoryID string) *CategoryResult
- func (r *Rubric) GetCoverage() *CoverageReport
- func (r *Rubric) GetExtension(key string) any
- func (r *Rubric) HasExtension(key string) bool
- func (r *Rubric) HasLowConfidence(threshold ...float64) bool
- func (r *Rubric) IsV2() bool
- func (r *Rubric) NeedsHumanReview(confidenceThreshold ...float64) bool
- func (r *Rubric) SetBlocking(codes []ReasonCode) *Rubric
- func (r *Rubric) SetConfidence(confidence float64) *Rubric
- func (r *Rubric) SetCoverage(coverage *CoverageReport)
- func (r *Rubric) SetExtension(key string, value any)
- func (r *Rubric) SetIntScore(score IntegerScore) *Rubric
- func (r *Rubric) SetJudge(judge *JudgeMetadata)
- func (r *Rubric) SetPass(pass bool) *Rubric
- func (r *Rubric) SetPassCriteria(criteria PassCriteria)
- func (r *Rubric) SetReference(ref *ReferenceData)
- func (r *Rubric) SetRubricInfo(rubricID, rubricVersion string)
type RubricMetadata
type RubricPassCriteria
type RubricSet
- func NewRubricSet(id, name, version string) *RubricSet
- func (rs *RubricSet) AddCategory(cat Category) *RubricSet
- func (rs *RubricSet) GetCategory(id string) *Category
- func (rs *RubricSet) GetRequiredCategories() []Category
- func (rs *RubricSet) SetJudgePrompt(template string) *RubricSet
- func (rs *RubricSet) SetMetadata(meta *RubricMetadata) *RubricSet
- func (rs *RubricSet) SetPassCriteria(criteria RubricPassCriteria) *RubricSet
- func (rs *RubricSet) ToJSON() ([]byte, error)
- func (rs *RubricSet) Validate() []string
type Scale
type ScaleOption
type ScaleType
type ScoreThresholds
type ScoreValue
- func LikertToCategorical(score int, config *LikertConfig) ScoreValue
- func (s ScoreValue) Icon() string
- func (s ScoreValue) IsFailing() bool
- func (s ScoreValue) IsPartial() bool
- func (s ScoreValue) IsPassing() bool
type Severity
- func AllSeverities() []Severity
- func WorstSeverity(findings []Finding) Severity
- func (s Severity) Icon() string
- func (s Severity) IsBlocking() bool
- func (s Severity) Weight() int
type TokenUsage
type ValidationIssue
type ValidationResult
- func ValidateReport(r *Rubric) *ValidationResult
- func ValidateRubricSetV2(rs *RubricSet) *ValidationResult
- func (r *ValidationResult) HasErrors() bool
- func (r *ValidationResult) HasWarnings() bool
- func (r *ValidationResult) String() string
type ValidationSeverity

Constants ¶

View Source

const (
	CategoryREQ    = "REQ"    // Requirements
	CategoryMETRIC = "METRIC" // Metrics and measurements
	CategoryUSER   = "USER"   // User personas and journeys
	CategoryARCH   = "ARCH"   // Architecture and technical design
	CategorySEC    = "SEC"    // Security
	CategorySCALE  = "SCALE"  // Scalability and performance
	CategoryINFRA  = "INFRA"  // Infrastructure and operations
	CategoryDOC    = "DOC"    // Documentation
	CategorySCOPE  = "SCOPE"  // Scope and constraints
	CategoryUX     = "UX"     // UX and accessibility
)

Reason code categories (prefixes).

View Source

const ExtensionKeyCoverage = "coverage"

ExtensionKeyCoverage is the standard extension key for coverage data.

View Source

const SchemaVersionV2 = "v2"

SchemaVersionV2 is the current schema version.

Variables ¶

View Source

var LegacyCodeMapping = map[ReasonCode]ReasonCode{
	"AMBIGUOUS_REQUIREMENT":       CodeREQAmbiguous,
	"MISSING_ACCEPTANCE_CRITERIA": CodeREQNoCriteria,
	"CONFLICTING_REQUIREMENTS":    CodeREQConflict,
	"INCOMPLETE_REQUIREMENT":      CodeREQIncomplete,
	"UNTESTABLE_REQUIREMENT":      CodeREQUntestable,
	"UNMEASURABLE_SUCCESS_METRIC": CodeMETRICUnmeasurable,
	"MISSING_METRIC_BASELINE":     CodeMETRICNoBaseline,
	"MISSING_METRIC_TARGET":       CodeMETRICNoTarget,
	"UNREALISTIC_TARGET":          CodeMETRICUnrealistic,
	"MISSING_TRACKING_PLAN":       CodeMETRICNoTracking,
	"MISSING_USER_PERSONA":        CodeUSERNoPersona,
	"INCOMPLETE_PERSONA":          CodeUSERIncomplete,
	"MISSING_USER_JOURNEY":        CodeUSERNoJourney,
	"UNCLEAR_PROBLEM_STATEMENT":   CodeUSERUnclearProblem,
	"INCOMPLETE_ERROR_HANDLING":   CodeARCHNoErrorHandling,
	"MISSING_API_CONTRACT":        CodeARCHNoAPI,
	"MISSING_DATA_MODEL":          CodeARCHNoDataModel,
	"MISSING_DEPENDENCY":          CodeARCHMissingDep,
	"ARCHITECTURE_GAP":            CodeARCHGap,
	"SECURITY_GAP":                CodeSECGap,
	"MISSING_AUTHENTICATION":      CodeSECNoAuth,
	"MISSING_AUTHORIZATION":       CodeSECNoAuthz,
	"DATA_PRIVACY_CONCERN":        CodeSECPrivacy,
	"MISSING_INPUT_VALIDATION":    CodeSECNoValidation,
	"SCALABILITY_CONCERN":         CodeSCALEConcern,
	"PERFORMANCE_RISK":            CodeSCALEPerformance,
	"MISSING_CAPACITY_PLAN":       CodeSCALENoCapacity,
	"SINGLE_POINT_OF_FAILURE":     CodeSCALESPOF,
	"MISSING_DEPLOYMENT_PLAN":     CodeINFRANoDeploy,
	"MISSING_MONITORING":          CodeINFRANoMonitor,
	"MISSING_ALERTING_STRATEGY":   CodeINFRANoAlert,
	"MISSING_RECOVERY_PLAN":       CodeINFRANoRecovery,
	"INSUFFICIENT_DOCUMENTATION":  CodeDOCInsufficient,
	"OUTDATED_REFERENCE":          CodeDOCOutdated,
	"MISSING_DIAGRAM":             CodeDOCNoDiagram,
	"SCOPE_CREEP":                 CodeSCOPECreep,
	"UNBOUNDED_SCOPE":             CodeSCOPEUnbounded,
	"MISSING_CONSTRAINTS":         CodeSCOPENoConstr,
	"ACCESSIBILITY_GAP":           CodeUXNoARIA,
	"MISSING_TIMELINE":            CodeSCOPENoTimeline,
}

Legacy code mappings for backwards compatibility. Maps old code names to new prefixed codes.

View Source

var ReasonCodeRegistry = map[ReasonCode]ReasonCodeInfo{

	CodeREQAmbiguous: {
		Code:            CodeREQAmbiguous,
		Category:        CategoryREQ,
		Description:     "Requirement lacks specificity or has multiple interpretations",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Rewrite this requirement to be specific and unambiguous. Include: (1) a clear action verb, (2) measurable outcome, (3) specific constraints or boundaries. Remove any vague terms like 'fast', 'user-friendly', 'easy'.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd", "trd"},
	},
	CodeREQNoCriteria: {
		Code:            CodeREQNoCriteria,
		Category:        CategoryREQ,
		Description:     "Requirement lacks acceptance criteria for verification",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Add acceptance criteria using Given/When/Then format. Include: (1) preconditions (Given), (2) action trigger (When), (3) expected outcome (Then). Cover both success and failure scenarios.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd"},
	},
	CodeREQConflict: {
		Code:            CodeREQConflict,
		Category:        CategoryREQ,
		Description:     "Two or more requirements contradict each other",
		DefaultSeverity: SeverityCritical,
		RepairPrompt:    "Identify the conflicting requirements and resolve by: (1) clarifying which takes priority, (2) merging into a consistent requirement, or (3) adding conditional logic to handle both cases. Document the resolution rationale.",
		RequiresHuman:   true,
		SpecTypes:       []string{"prd", "mrd", "trd"},
	},
	CodeREQIncomplete: {
		Code:            CodeREQIncomplete,
		Category:        CategoryREQ,
		Description:     "Requirement is missing essential details",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Complete this requirement by adding missing elements: (1) WHO is affected, (2) WHAT action/capability, (3) WHY it's needed (business value), (4) any constraints or dependencies.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeREQUntestable: {
		Code:            CodeREQUntestable,
		Category:        CategoryREQ,
		Description:     "Requirement cannot be objectively verified",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Rewrite this requirement to be testable. Add: (1) specific numeric thresholds where applicable, (2) clear pass/fail conditions, (3) measurement method. Replace subjective terms with objective criteria.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd"},
	},
	CodeREQMissingReason: {
		Code:            CodeREQMissingReason,
		Category:        CategoryREQ,
		Description:     "Requirement lacks justification or business rationale",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add a rationale explaining WHY this requirement exists. Include: (1) business value or user benefit, (2) problem it solves, (3) consequences of not implementing it.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd"},
	},

	CodeMETRICUnmeasurable: {
		Code:            CodeMETRICUnmeasurable,
		Category:        CategoryMETRIC,
		Description:     "Success metric cannot be objectively measured",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Make this metric measurable by specifying: (1) exact data source, (2) calculation formula, (3) measurement frequency, (4) responsible team/system for tracking.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeMETRICNoBaseline: {
		Code:            CodeMETRICNoBaseline,
		Category:        CategoryMETRIC,
		Description:     "Success metric lacks a baseline value",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add baseline value for this metric. Include: (1) current value or 'TBD - to be measured in sprint 1', (2) date of measurement, (3) methodology used to obtain baseline.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeMETRICNoTarget: {
		Code:            CodeMETRICNoTarget,
		Category:        CategoryMETRIC,
		Description:     "Success metric lacks a target value",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add a specific target value with: (1) numeric goal, (2) timeframe to achieve it, (3) rationale for why this target was chosen.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeMETRICUnrealistic: {
		Code:            CodeMETRICUnrealistic,
		Category:        CategoryMETRIC,
		Description:     "Target appears unrealistic given constraints",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Revise this target to be achievable. Either: (1) provide evidence/benchmarks supporting the target, (2) adjust to a realistic value with justification, or (3) break into incremental milestones.",
		RequiresHuman:   true,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeMETRICNoTracking: {
		Code:            CodeMETRICNoTracking,
		Category:        CategoryMETRIC,
		Description:     "No plan for how metrics will be tracked",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add tracking plan specifying: (1) tool/system for measurement, (2) reporting frequency, (3) dashboard or alert thresholds, (4) who reviews the metrics.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd"},
	},
	CodeMETRICMissingKPI: {
		Code:            CodeMETRICMissingKPI,
		Category:        CategoryMETRIC,
		Description:     "Key performance indicator not defined for critical feature",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add KPI for this feature covering: (1) primary success metric, (2) leading indicators, (3) guardrail metrics to prevent negative side effects.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeMETRICVanity: {
		Code:            CodeMETRICVanity,
		Category:        CategoryMETRIC,
		Description:     "Metric does not correlate with actual business value",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Replace this vanity metric with an actionable metric that: (1) correlates with business outcomes, (2) can influence decisions, (3) measures user value not just activity.",
		RequiresHuman:   true,
		SpecTypes:       []string{"prd", "mrd"},
	},

	CodeUSERNoPersona: {
		Code:            CodeUSERNoPersona,
		Category:        CategoryUSER,
		Description:     "Target user persona not defined",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Add user persona including: (1) name and role, (2) demographics/context, (3) goals and motivations, (4) pain points and frustrations, (5) technical proficiency level.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd", "uxd"},
	},
	CodeUSERIncomplete: {
		Code:            CodeUSERIncomplete,
		Category:        CategoryUSER,
		Description:     "User persona lacks essential details",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Complete this persona by adding missing elements: goals, pain points, behavioral characteristics, technical context, or typical use scenarios.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "uxd"},
	},
	CodeUSERNoJourney: {
		Code:            CodeUSERNoJourney,
		Category:        CategoryUSER,
		Description:     "User journey or flow not documented",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add user journey showing: (1) entry point/trigger, (2) step-by-step actions, (3) decision points, (4) success outcome, (5) potential failure points and recovery.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "uxd"},
	},
	CodeUSERUnclearProblem: {
		Code:            CodeUSERUnclearProblem,
		Category:        CategoryUSER,
		Description:     "Problem statement is vague or unclear",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Clarify the problem statement using this format: '[User persona] needs [capability] because [reason], but currently [obstacle/pain point].' Be specific about the impact.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeUSERNoGoals: {
		Code:            CodeUSERNoGoals,
		Category:        CategoryUSER,
		Description:     "User goals not articulated",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add user goals distinguishing between: (1) functional goals (tasks to complete), (2) emotional goals (how they want to feel), (3) social goals (how they want to be perceived).",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "uxd"},
	},
	CodeUSERNoPainPoints: {
		Code:            CodeUSERNoPainPoints,
		Category:        CategoryUSER,
		Description:     "User pain points not documented",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Document pain points including: (1) current workarounds users employ, (2) time/money cost of the problem, (3) emotional frustration, (4) frequency of occurrence.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd", "uxd"},
	},

	CodeARCHNoErrorHandling: {
		Code:            CodeARCHNoErrorHandling,
		Category:        CategoryARCH,
		Description:     "Error handling strategy is incomplete",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add error handling covering: (1) error categories (network, validation, auth, server), (2) retry strategy per category, (3) user-facing error messages, (4) logging/alerting approach, (5) graceful degradation.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd", "ird"},
	},
	CodeARCHNoAPI: {
		Code:            CodeARCHNoAPI,
		Category:        CategoryARCH,
		Description:     "API contract or interface not specified",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Add API specification including: (1) endpoint paths and methods, (2) request/response schemas with examples, (3) authentication requirements, (4) rate limits, (5) error response format, (6) versioning strategy.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd"},
	},
	CodeARCHNoDataModel: {
		Code:            CodeARCHNoDataModel,
		Category:        CategoryARCH,
		Description:     "Data model or schema not defined",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Add data model with: (1) entity definitions and fields, (2) relationships between entities, (3) data types and constraints, (4) indexes for query patterns, (5) migration strategy for schema changes.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd"},
	},
	CodeARCHMissingDep: {
		Code:            CodeARCHMissingDep,
		Category:        CategoryARCH,
		Description:     "Required dependency not documented",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Document this dependency including: (1) name and version, (2) why it's needed, (3) license compatibility, (4) maintenance status, (5) fallback if deprecated.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd", "ird"},
	},
	CodeARCHGap: {
		Code:            CodeARCHGap,
		Category:        CategoryARCH,
		Description:     "Architecture has unexplained gaps or missing components",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Address the architectural gap by: (1) adding the missing component with rationale, (2) explaining why it's intentionally omitted, or (3) marking as future work with timeline.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd"},
	},
	CodeARCHNoInterface: {
		Code:            CodeARCHNoInterface,
		Category:        CategoryARCH,
		Description:     "Interface between components not defined",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Define the interface including: (1) method signatures, (2) input/output types, (3) error conditions, (4) idempotency guarantees, (5) versioning approach.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd"},
	},
	CodeARCHCircularDep: {
		Code:            CodeARCHCircularDep,
		Category:        CategoryARCH,
		Description:     "Circular dependency detected between components",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Resolve circular dependency by: (1) extracting shared logic to a new module, (2) using dependency injection, (3) introducing an interface/abstraction layer, or (4) restructuring the component boundaries.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd"},
	},
	CodeARCHTightCoupling: {
		Code:            CodeARCHTightCoupling,
		Category:        CategoryARCH,
		Description:     "Components are too tightly coupled",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Reduce coupling by: (1) defining clear interfaces, (2) using events/messages instead of direct calls, (3) applying dependency inversion, (4) documenting the bounded context.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd"},
	},

	CodeSECGap: {
		Code:            CodeSECGap,
		Category:        CategorySEC,
		Description:     "Security consideration not addressed",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Add security controls addressing: (1) threat model for this feature, (2) mitigation strategies, (3) security testing requirements, (4) incident response considerations.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd", "ird", "prd"},
	},
	CodeSECNoAuth: {
		Code:            CodeSECNoAuth,
		Category:        CategorySEC,
		Description:     "Authentication mechanism not specified",
		DefaultSeverity: SeverityCritical,
		RepairPrompt:    "Define authentication including: (1) auth method (OAuth, JWT, API key, etc.), (2) token/session management, (3) password/credential requirements, (4) MFA considerations, (5) session timeout policy.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd"},
	},
	CodeSECNoAuthz: {
		Code:            CodeSECNoAuthz,
		Category:        CategorySEC,
		Description:     "Authorization model not defined",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Add authorization covering: (1) role definitions, (2) permission matrix, (3) resource-level access rules, (4) principle of least privilege application, (5) authorization check points.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd"},
	},
	CodeSECPrivacy: {
		Code:            CodeSECPrivacy,
		Category:        CategorySEC,
		Description:     "Data privacy requirements not addressed",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Document data privacy including: (1) PII identification, (2) data classification, (3) retention policy, (4) deletion/anonymization procedures, (5) compliance requirements (GDPR, CCPA, etc.).",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd", "prd"},
	},
	CodeSECNoValidation: {
		Code:            CodeSECNoValidation,
		Category:        CategorySEC,
		Description:     "Input validation not specified",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add input validation covering: (1) allowed characters/formats, (2) length limits, (3) sanitization rules, (4) validation error messages, (5) server-side validation (never trust client).",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd"},
	},
	CodeSECNoEncryption: {
		Code:            CodeSECNoEncryption,
		Category:        CategorySEC,
		Description:     "Encryption requirements not specified",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Define encryption requirements: (1) data at rest encryption, (2) data in transit (TLS version), (3) key management strategy, (4) sensitive field encryption.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd", "ird"},
	},
	CodeSECHardcodedSecret: {
		Code:            CodeSECHardcodedSecret,
		Category:        CategorySEC,
		Description:     "Hardcoded secrets or credentials detected",
		DefaultSeverity: SeverityCritical,
		RepairPrompt:    "Remove hardcoded secrets and specify: (1) secret management system to use, (2) environment variable naming convention, (3) rotation policy, (4) access audit logging.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd", "ird"},
	},
	CodeSECInjectionRisk: {
		Code:            CodeSECInjectionRisk,
		Category:        CategorySEC,
		Description:     "Potential injection vulnerability (SQL, XSS, command)",
		DefaultSeverity: SeverityCritical,
		RepairPrompt:    "Address injection risk by specifying: (1) parameterized queries (SQL), (2) output encoding (XSS), (3) input sanitization, (4) CSP headers, (5) security testing requirements.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd"},
	},

	CodeSCALEConcern: {
		Code:            CodeSCALEConcern,
		Category:        CategorySCALE,
		Description:     "Scalability concern not addressed",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add scaling strategy covering: (1) expected load (requests/sec, concurrent users), (2) horizontal vs vertical scaling approach, (3) bottleneck identification, (4) auto-scaling triggers.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd", "ird"},
	},
	CodeSCALEPerformance: {
		Code:            CodeSCALEPerformance,
		Category:        CategorySCALE,
		Description:     "Performance risk identified but not mitigated",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add performance requirements: (1) latency targets (p50, p95, p99), (2) throughput requirements, (3) resource limits (CPU, memory), (4) performance testing approach.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd", "ird"},
	},
	CodeSCALENoCapacity: {
		Code:            CodeSCALENoCapacity,
		Category:        CategorySCALE,
		Description:     "Capacity planning not documented",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add capacity plan including: (1) initial resource sizing, (2) growth projections (6mo, 1yr), (3) cost estimates, (4) scaling thresholds.",
		RequiresHuman:   false,
		SpecTypes:       []string{"ird"},
	},
	CodeSCALESPOF: {
		Code:            CodeSCALESPOF,
		Category:        CategorySCALE,
		Description:     "Single point of failure identified",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Eliminate SPOF by adding: (1) redundancy strategy (active-active, active-passive), (2) failover mechanism, (3) health check configuration, (4) recovery time objective.",
		RequiresHuman:   true,
		SpecTypes:       []string{"trd", "ird"},
	},
	CodeSCALENoRateLimit: {
		Code:            CodeSCALENoRateLimit,
		Category:        CategorySCALE,
		Description:     "Rate limiting not specified",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add rate limiting covering: (1) limits per user/IP/API key, (2) time windows, (3) response when exceeded (429 status), (4) bypass rules for internal services.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd"},
	},
	CodeSCALENoCache: {
		Code:            CodeSCALENoCache,
		Category:        CategorySCALE,
		Description:     "Caching strategy not defined for frequently accessed data",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add caching strategy: (1) what to cache, (2) cache location (CDN, Redis, in-memory), (3) TTL/expiration policy, (4) cache invalidation approach.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd"},
	},
	CodeSCALEBlockingOp: {
		Code:            CodeSCALEBlockingOp,
		Category:        CategorySCALE,
		Description:     "Blocking operation in critical path",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Address blocking operation by: (1) making it async with queue/callback, (2) adding timeout, (3) implementing circuit breaker, (4) providing fallback response.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd"},
	},

	CodeINFRANoDeploy: {
		Code:            CodeINFRANoDeploy,
		Category:        CategoryINFRA,
		Description:     "Deployment strategy not defined",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add deployment plan covering: (1) deployment method (blue-green, canary, rolling), (2) environment progression (dev→staging→prod), (3) deployment automation, (4) smoke test requirements.",
		RequiresHuman:   false,
		SpecTypes:       []string{"ird"},
	},
	CodeINFRANoMonitor: {
		Code:            CodeINFRANoMonitor,
		Category:        CategoryINFRA,
		Description:     "Monitoring strategy not defined",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add monitoring covering: (1) key metrics to track, (2) logging strategy and retention, (3) tracing/correlation IDs, (4) dashboard requirements, (5) SLO definitions.",
		RequiresHuman:   false,
		SpecTypes:       []string{"ird"},
	},
	CodeINFRANoAlert: {
		Code:            CodeINFRANoAlert,
		Category:        CategoryINFRA,
		Description:     "Alerting and on-call strategy not defined",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add alerting strategy: (1) alert thresholds and conditions, (2) severity levels, (3) notification channels, (4) escalation policy, (5) on-call rotation.",
		RequiresHuman:   false,
		SpecTypes:       []string{"ird"},
	},
	CodeINFRANoRecovery: {
		Code:            CodeINFRANoRecovery,
		Category:        CategoryINFRA,
		Description:     "Disaster recovery plan not documented",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add DR plan including: (1) RTO target, (2) RPO target, (3) recovery procedures, (4) data backup strategy, (5) DR testing schedule.",
		RequiresHuman:   true,
		SpecTypes:       []string{"ird"},
	},
	CodeINFRANoBackup: {
		Code:            CodeINFRANoBackup,
		Category:        CategoryINFRA,
		Description:     "Backup strategy not specified",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add backup strategy: (1) what is backed up, (2) backup frequency, (3) retention period, (4) backup location (offsite), (5) restore testing procedure.",
		RequiresHuman:   false,
		SpecTypes:       []string{"ird"},
	},
	CodeINFRANoRunbook: {
		Code:            CodeINFRANoRunbook,
		Category:        CategoryINFRA,
		Description:     "Operational runbook not provided",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Create runbook covering: (1) common operational tasks, (2) troubleshooting steps for known issues, (3) escalation contacts, (4) maintenance procedures.",
		RequiresHuman:   false,
		SpecTypes:       []string{"ird"},
	},
	CodeINFRANoRollback: {
		Code:            CodeINFRANoRollback,
		Category:        CategoryINFRA,
		Description:     "Rollback procedure not defined",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add rollback procedure: (1) trigger criteria for rollback, (2) step-by-step rollback process, (3) data migration rollback (if applicable), (4) verification steps post-rollback.",
		RequiresHuman:   false,
		SpecTypes:       []string{"ird"},
	},
	CodeINFRAEnvMismatch: {
		Code:            CodeINFRAEnvMismatch,
		Category:        CategoryINFRA,
		Description:     "Environment configuration mismatch risk",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Address environment parity: (1) document environment differences, (2) use infrastructure-as-code, (3) implement config validation, (4) add environment-specific testing.",
		RequiresHuman:   false,
		SpecTypes:       []string{"ird"},
	},

	CodeDOCInsufficient: {
		Code:            CodeDOCInsufficient,
		Category:        CategoryDOC,
		Description:     "Documentation is insufficient for implementation",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Expand documentation to include: (1) implementation details, (2) code examples, (3) edge cases and their handling, (4) integration points.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "trd", "ird", "uxd"},
	},
	CodeDOCOutdated: {
		Code:            CodeDOCOutdated,
		Category:        CategoryDOC,
		Description:     "Reference to outdated information or deprecated feature",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Update the outdated reference to: (1) current version/approach, (2) migration path if applicable, (3) deprecation timeline if relevant.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "trd", "ird"},
	},
	CodeDOCNoDiagram: {
		Code:            CodeDOCNoDiagram,
		Category:        CategoryDOC,
		Description:     "Visual diagram would improve clarity",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add a diagram showing: (1) component relationships, (2) data flow, (3) sequence of operations, or (4) architecture overview. Use Mermaid or similar text-based diagram format.",
		RequiresHuman:   false,
		SpecTypes:       []string{"trd", "ird", "uxd"},
	},
	CodeDOCNoExamples: {
		Code:            CodeDOCNoExamples,
		Category:        CategoryDOC,
		Description:     "Missing examples to clarify usage",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add concrete examples showing: (1) typical usage, (2) edge cases, (3) error scenarios. Include request/response examples for APIs.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "trd"},
	},
	CodeDOCInconsistent: {
		Code:            CodeDOCInconsistent,
		Category:        CategoryDOC,
		Description:     "Documentation inconsistent with other sections",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Resolve the inconsistency by: (1) identifying the source of truth, (2) updating conflicting sections, (3) adding cross-references to avoid future drift.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "trd", "ird", "uxd"},
	},

	CodeSCOPECreep: {
		Code:            CodeSCOPECreep,
		Category:        CategorySCOPE,
		Description:     "Scope includes unnecessary features or complexity",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Address scope creep by: (1) moving non-essential items to 'Future Work', (2) justifying inclusion with business value, or (3) removing entirely with rationale.",
		RequiresHuman:   true,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeSCOPEUnbounded: {
		Code:            CodeSCOPEUnbounded,
		Category:        CategorySCOPE,
		Description:     "Scope is unbounded or unclear",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Define scope boundaries: (1) explicit 'In Scope' list, (2) explicit 'Out of Scope' / 'Non-Goals' list, (3) decision criteria for borderline items.",
		RequiresHuman:   true,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeSCOPENoConstr: {
		Code:            CodeSCOPENoConstr,
		Category:        CategorySCOPE,
		Description:     "Constraints or limitations not documented",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Document constraints including: (1) technical constraints (platform, language, etc.), (2) business constraints (budget, timeline), (3) resource constraints (team size, skills).",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "trd"},
	},
	CodeSCOPENoNonGoals: {
		Code:            CodeSCOPENoNonGoals,
		Category:        CategorySCOPE,
		Description:     "Non-goals not explicitly stated",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add non-goals section listing: (1) features explicitly NOT being built, (2) use cases NOT being supported, (3) rationale for each exclusion.",
		RequiresHuman:   false,
		SpecTypes:       []string{"prd", "mrd"},
	},
	CodeSCOPEMVPUnclear: {
		Code:            CodeSCOPEMVPUnclear,
		Category:        CategorySCOPE,
		Description:     "MVP scope not clearly defined",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Define MVP clearly: (1) minimum feature set for launch, (2) success criteria for MVP, (3) what's deferred to post-MVP, (4) timeline for MVP.",
		RequiresHuman:   true,
		SpecTypes:       []string{"prd"},
	},
	CodeSCOPENoTimeline: {
		Code:            CodeSCOPENoTimeline,
		Category:        CategorySCOPE,
		Description:     "Timeline or milestones not specified",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add timeline with: (1) key milestones, (2) dependencies between milestones, (3) target dates (even if tentative), (4) critical path identification.",
		RequiresHuman:   true,
		SpecTypes:       []string{"prd", "mrd"},
	},

	CodeUXNoARIA: {
		Code:            CodeUXNoARIA,
		Category:        CategoryUX,
		Description:     "ARIA labels or accessibility attributes not specified",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Add accessibility requirements: (1) ARIA labels for interactive elements, (2) alt text requirements for images, (3) heading hierarchy, (4) focus management, (5) screen reader considerations.",
		RequiresHuman:   false,
		SpecTypes:       []string{"uxd"},
	},
	CodeUXNoErrorState: {
		Code:            CodeUXNoErrorState,
		Category:        CategoryUX,
		Description:     "Error state UI not designed",
		DefaultSeverity: SeverityHigh,
		RepairPrompt:    "Design error states including: (1) error message content and tone, (2) visual treatment, (3) recovery actions available to user, (4) retry mechanisms, (5) when to show inline vs page-level errors.",
		RequiresHuman:   false,
		SpecTypes:       []string{"uxd"},
	},
	CodeUXNoLoading: {
		Code:            CodeUXNoLoading,
		Category:        CategoryUX,
		Description:     "Loading state not specified",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add loading state design: (1) loading indicator type (spinner, skeleton, progress), (2) placement, (3) timeout handling, (4) partial content display strategy.",
		RequiresHuman:   false,
		SpecTypes:       []string{"uxd"},
	},
	CodeUXNoEmpty: {
		Code:            CodeUXNoEmpty,
		Category:        CategoryUX,
		Description:     "Empty state not designed",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Design empty state with: (1) friendly message explaining why empty, (2) illustration or icon, (3) call-to-action to populate, (4) help/onboarding content.",
		RequiresHuman:   false,
		SpecTypes:       []string{"uxd"},
	},
	CodeUXNoResponsive: {
		Code:            CodeUXNoResponsive,
		Category:        CategoryUX,
		Description:     "Responsive/mobile behavior not specified",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add responsive design specs: (1) breakpoints, (2) layout changes per breakpoint, (3) touch target sizes, (4) mobile-specific interactions, (5) content prioritization for small screens.",
		RequiresHuman:   false,
		SpecTypes:       []string{"uxd"},
	},
	CodeUXNoKeyboard: {
		Code:            CodeUXNoKeyboard,
		Category:        CategoryUX,
		Description:     "Keyboard navigation not specified",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Add keyboard accessibility: (1) tab order, (2) keyboard shortcuts, (3) focus indicators, (4) skip links, (5) escape key behavior for modals.",
		RequiresHuman:   false,
		SpecTypes:       []string{"uxd"},
	},
	CodeUXIncompleteNav: {
		Code:            CodeUXIncompleteNav,
		Category:        CategoryUX,
		Description:     "Navigation flow incomplete",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Complete navigation by adding: (1) entry points to this screen, (2) exit points/next steps, (3) back navigation behavior, (4) breadcrumb requirements, (5) deep linking support.",
		RequiresHuman:   false,
		SpecTypes:       []string{"uxd"},
	},
	CodeUXNoFeedback: {
		Code:            CodeUXNoFeedback,
		Category:        CategoryUX,
		Description:     "User feedback mechanism not designed",
		DefaultSeverity: SeverityLow,
		RepairPrompt:    "Add user feedback design: (1) success confirmations, (2) progress indicators for long operations, (3) toast/notification placement, (4) undo capabilities.",
		RequiresHuman:   false,
		SpecTypes:       []string{"uxd"},
	},

	CodeOther: {
		Code:            CodeOther,
		Category:        "other",
		Description:     "Issue does not fit standard categories",
		DefaultSeverity: SeverityMedium,
		RepairPrompt:    "Review the specific issue and address based on context. Consider whether a new reason code category should be created for this type of issue.",
		RequiresHuman:   true,
		SpecTypes:       []string{},
	},
}

ReasonCodeRegistry maps codes to their metadata.

Functions ¶

func AllReasonCodeCategories ¶ added in v0.9.0

func AllReasonCodeCategories() []string

AllReasonCodeCategories returns all unique reason code categories.

func AllRequiredPassing ¶

func AllRequiredPassing(results []CategoryResult, rubric *RubricSet) bool

AllRequiredPassing checks if all required categories passed.

func CountFindingsByCode ¶ added in v0.9.0

func CountFindingsByCode(findings []Finding) map[ReasonCode]int

CountFindingsByCode counts findings by reason code.

func GetRepairPrompt ¶ added in v0.9.0

func GetRepairPrompt(code ReasonCode) string

GetRepairPrompt returns the AI repair prompt for a code.

func RequiresHumanReview ¶ added in v0.9.0

func RequiresHumanReview(code ReasonCode) bool

RequiresHumanReview returns true if the code needs human review after AI repair.

func ValidDecisionStatusValues ¶ added in v0.7.0

func ValidDecisionStatusValues() []string

ValidDecisionStatusValues returns all valid decision status values.

func ValidEvaluationTypeValues ¶ added in v0.7.0

func ValidEvaluationTypeValues() []string

ValidEvaluationTypeValues returns all valid evaluation type values.

func ValidIntegerScoreValues ¶ added in v0.9.0

func ValidIntegerScoreValues() []int

ValidIntegerScoreValues returns all valid integer score values (1-5).

func ValidReasonCodes ¶ added in v0.9.0

func ValidReasonCodes() []string

ValidReasonCodes returns all registered reason codes.

func ValidScaleTypeValues ¶ added in v0.7.0

func ValidScaleTypeValues() []string

ValidScaleTypeValues returns all valid scale type values.

func ValidScoreValues ¶ added in v0.7.0

func ValidScoreValues() []string

ValidScoreValues returns all valid score values.

func ValidSeverityValues ¶ added in v0.7.0

func ValidSeverityValues() []string

ValidSeverityValues returns all valid severity values.

Types ¶

type AggregationMethod ¶

type AggregationMethod string

AggregationMethod specifies how to combine multiple judge scores.

const (
	// AggregationMajority uses majority vote for pass/partial/fail.
	AggregationMajority AggregationMethod = "majority"

	// AggregationConservative uses the lowest/most critical score.
	AggregationConservative AggregationMethod = "conservative"

	// AggregationOptimistic uses the highest/most lenient score.
	AggregationOptimistic AggregationMethod = "optimistic"

	// AggregationUnanimous requires all judges to agree.
	AggregationUnanimous AggregationMethod = "unanimous"
)

type Annotation ¶

type Annotation struct {
	// Name is the annotation type (e.g., "quality", "relevance").
	Name string `json:"name"`

	// Score is a numeric score (if applicable).
	Score float64 `json:"score,omitempty"`

	// Label is a categorical label (if applicable).
	Label string `json:"label,omitempty"`

	// Explanation provides reasoning for the annotation.
	Explanation string `json:"explanation,omitempty"`

	// AnnotatorID identifies who provided this annotation.
	AnnotatorID string `json:"annotator_id,omitempty"`

	// AnnotatorType indicates human vs automated (e.g., "human", "llm", "rule").
	AnnotatorType string `json:"annotator_type,omitempty"`
}

Annotation represents a human-provided label or score.

type CategoricalAgreement ¶

type CategoricalAgreement struct {
	// ExactAgreement is percentage of exact categorical matches.
	ExactAgreement float64 `json:"exactAgreement"`

	// ConfusionMatrix shows disagreement patterns.
	// Keys are "rater1_score:rater2_score" (e.g., "pass:partial").
	ConfusionMatrix map[string]int `json:"confusionMatrix"`

	// SampleSize is the number of paired ratings.
	SampleSize int `json:"sampleSize"`
}

CategoricalAgreement computes agreement between categorical scores.

func ComputeCategoricalAgreement ¶

func ComputeCategoricalAgreement(results1, results2 []CategoryResult) *CategoricalAgreement

ComputeCategoricalAgreement computes agreement between categorical scores.

type Category ¶

type Category struct {
	// ID uniquely identifies this category within the rubric.
	ID string `json:"id" yaml:"id"`

	// Name is the human-readable category name.
	Name string `json:"name" yaml:"name"`

	// Description explains what this category measures.
	Description string `json:"description" yaml:"description"`

	// Weight is the relative importance (default 1.0).
	Weight float64 `json:"weight,omitempty" yaml:"weight,omitempty"`

	// Required indicates if this category must pass for overall pass.
	Required bool `json:"required,omitempty" yaml:"required,omitempty"`

	// Scale defines how this category is scored.
	Scale Scale `json:"scale" yaml:"scale"`

	// EvaluationPrompt is a specific prompt for evaluating this category.
	EvaluationPrompt string `json:"evaluationPrompt,omitempty" yaml:"evaluationPrompt,omitempty"`

	// Examples provides few-shot examples for LLM evaluation.
	// Research shows 1 example per level improves LLM alignment.
	Examples *CategoryExamples `json:"examples,omitempty" yaml:"examples,omitempty"`

	// Criteria optionally decomposes this category into weighted sub-criteria,
	// each scored independently at pass/partial/fail with concrete indicators.
	// When present, the category is "composite" (the rich-rubric form) and its
	// score aggregates its criteria by weight. Simple categories omit this and
	// are scored directly via Scale.
	Criteria []Criterion `json:"criteria,omitempty" yaml:"criteria,omitempty"`
}

Category is a single evaluation dimension.

func NewCategory ¶

func NewCategory(id, name, description string) *Category

NewCategory creates a new category with a categorical scale.

func (*Category) AddOption ¶

func (c *Category) AddOption(value, label string, criteria ...string) *Category

AddOption adds a scale option to a categorical category.

func (*Category) GetOptionForValue ¶

func (c *Category) GetOptionForValue(value string) *ScaleOption

GetOptionForValue returns the scale option for a given value.

func (*Category) IsComposite ¶ added in v0.10.0

func (c *Category) IsComposite() bool

IsComposite reports whether the category decomposes into weighted criteria (the rich-rubric form) rather than being scored directly via its Scale.

func (*Category) SetEvaluationPrompt ¶

func (c *Category) SetEvaluationPrompt(prompt string) *Category

SetEvaluationPrompt sets the evaluation prompt for this category.

func (*Category) SetExamples ¶

func (c *Category) SetExamples(examples *CategoryExamples) *Category

SetExamples sets few-shot examples for the category.

func (*Category) SetRequired ¶

func (c *Category) SetRequired(required bool) *Category

SetRequired marks this category as required for pass.

func (*Category) SetWeight ¶

func (c *Category) SetWeight(weight float64) *Category

SetWeight sets the category weight.

func (*Category) WithBinary ¶

func (c *Category) WithBinary(passCriteria, failCriteria []string) *Category

WithBinary sets up a binary pass/fail scale.

func (*Category) WithChecklist ¶

func (c *Category) WithChecklist(required, optional []string, threshold *ChecklistThreshold) *Category

WithChecklist sets up a checklist scale.

func (*Category) WithLikert ¶

func (c *Category) WithLikert(config *LikertConfig) *Category

WithLikert sets up a Likert scale with custom configuration.

func (*Category) WithLikert5 ¶

func (c *Category) WithLikert5(anchors []LikertAnchor) *Category

WithLikert5 sets up a standard 1-5 Likert scale. Default thresholds: 4-5 = pass, 3 = partial, 1-2 = fail.

func (*Category) WithPassPartialFail ¶

func (c *Category) WithPassPartialFail(passCriteria, partialCriteria, failCriteria []string) *Category

WithPassPartialFail sets up a standard pass/partial/fail scale.

type CategoryExamples ¶

type CategoryExamples struct {
	Pass    *Example `json:"pass,omitempty" yaml:"pass,omitempty"`
	Partial *Example `json:"partial,omitempty" yaml:"partial,omitempty"`
	Fail    *Example `json:"fail,omitempty" yaml:"fail,omitempty"`
}

CategoryExamples provides few-shot examples for a category. Research shows 1 example per level improves LLM alignment.

type CategoryResult ¶

type CategoryResult struct {
	// Category is the category ID.
	Category string `json:"category"`

	// Score is the assigned score (pass, partial, fail).
	// This is the authoritative score for decision-making.
	Score ScoreValue `json:"score"`

	// IntScore is the 1-5 integer score.
	// Preferred for LLM judges as they are unreliable at finer granularity.
	IntScore IntegerScore `json:"intScore,omitempty"`

	// NumericScore is an optional numeric score (e.g., 1-5 Likert).
	// Used for human comparison, inter-rater reliability, and calibration.
	// The categorical Score takes precedence for pass/fail decisions.
	NumericScore *float64 `json:"numericScore,omitempty"`

	// Confidence is the evaluator's confidence in this score (0.0-1.0).
	// Low confidence scores may be routed to human review.
	Confidence float64 `json:"confidence,omitempty"`

	// Severity is the highest-severity finding in this category (empty if
	// there are none). Computed automatically — via AddCategoryResult, or
	// as a safety net in Evaluate for categories appended directly to
	// Rubric.Categories — so it can't drift from Findings; it is not an
	// independent judgment. Exists for prioritizing which categories to
	// fix first, distinct from Score/IntScore, which measure quality.
	Severity Severity `json:"severity,omitempty"`

	// ReasonCodes are standardized finding identifiers for this category.
	// Enable automated repair workflows.
	ReasonCodes []ReasonCode `json:"reasonCodes,omitempty"`

	// Reasoning explains the score (chain-of-thought).
	Reasoning string `json:"reasoning"`

	// Evidence are specific quotes or observations.
	Evidence []string `json:"evidence,omitempty"`

	// Findings are issues discovered in this category.
	Findings []Finding `json:"findings,omitempty"`

	// ChecklistResults tracks checklist items (for checklist scales).
	ChecklistResults *ChecklistResults `json:"checklistResults,omitempty"`
}

CategoryResult is the evaluation result for a single category.

func NewCategoryResult ¶

func NewCategoryResult(category string, score ScoreValue, reasoning string) *CategoryResult

NewCategoryResult creates a category result with the given score.

func NewCategoryResultFromLikert ¶

func NewCategoryResultFromLikert(category string, likertScore int, config *LikertConfig, reasoning string) *CategoryResult

NewCategoryResultFromLikert creates a category result from a Likert score. The categorical score is derived from the numeric score using the config thresholds.

func NewCategoryResultWithIntScore ¶ added in v0.9.0

func NewCategoryResultWithIntScore(category string, intScore IntegerScore, confidence float64, reasoning string) *CategoryResult

NewCategoryResultWithIntScore creates a category result from an integer score.

func NewCategoryResultWithNumeric ¶

func NewCategoryResultWithNumeric(category string, score ScoreValue, numericScore float64, reasoning string) *CategoryResult

NewCategoryResultWithNumeric creates a category result with both categorical and numeric scores. The numeric score is used for human comparison; categorical score is authoritative for decisions.

func (*CategoryResult) AddEvidence ¶

func (cr *CategoryResult) AddEvidence(evidence ...string) *CategoryResult

AddEvidence adds evidence to the result.

func (*CategoryResult) AddFinding ¶

func (cr *CategoryResult) AddFinding(f Finding) *CategoryResult

AddFinding adds a finding to the result and recomputes Severity from the updated Findings, so it never drifts out of sync with what's actually been added.

func (*CategoryResult) AddReasonCode ¶ added in v0.9.0

func (cr *CategoryResult) AddReasonCode(code ReasonCode) *CategoryResult

AddReasonCode adds a reason code to this category result.

func (*CategoryResult) AddReasonCodes ¶ added in v0.9.0

func (cr *CategoryResult) AddReasonCodes(codes ...ReasonCode) *CategoryResult

AddReasonCodes adds multiple reason codes to this category result.

func (*CategoryResult) GetNumericScore ¶

func (cr *CategoryResult) GetNumericScore() float64

GetNumericScore returns the numeric score, or 0 if not set.

func (*CategoryResult) HasLowConfidence ¶ added in v0.9.0

func (cr *CategoryResult) HasLowConfidence(threshold ...float64) bool

HasLowConfidence returns true if confidence is below the threshold (default 0.7).

func (*CategoryResult) HasNumericScore ¶

func (cr *CategoryResult) HasNumericScore() bool

HasNumericScore returns true if a numeric score is set.

func (*CategoryResult) IsPassing ¶

func (cr *CategoryResult) IsPassing() bool

IsPassing returns true if this category passed.

func (*CategoryResult) SetChecklistResults ¶

func (cr *CategoryResult) SetChecklistResults(results *ChecklistResults) *CategoryResult

SetChecklistResults sets the checklist results.

func (*CategoryResult) SetConfidence ¶ added in v0.9.0

func (cr *CategoryResult) SetConfidence(confidence float64) *CategoryResult

SetConfidence sets the confidence value.

func (*CategoryResult) SetIntScore ¶ added in v0.9.0

func (cr *CategoryResult) SetIntScore(score IntegerScore) *CategoryResult

SetIntScore sets the integer score and derives the categorical score.

func (*CategoryResult) SetNumericScore ¶

func (cr *CategoryResult) SetNumericScore(score float64) *CategoryResult

SetNumericScore sets the numeric score.

type CategoryResultCounts ¶

type CategoryResultCounts struct {
	Pass    int `json:"pass"`
	Partial int `json:"partial"`
	Fail    int `json:"fail"`
	Total   int `json:"total"`
}

CountCategoryResults counts results by score value.

func CountResults ¶

func CountResults(results []CategoryResult) CategoryResultCounts

CountResults counts category results by score.

func (CategoryResultCounts) AllPassing ¶

func (c CategoryResultCounts) AllPassing() bool

AllPassing returns true if all results are passing.

type ChecklistResults ¶

type ChecklistResults struct {
	// RequiredPresent are required items that were found.
	RequiredPresent []string `json:"requiredPresent,omitempty"`

	// RequiredMissing are required items that were not found.
	RequiredMissing []string `json:"requiredMissing,omitempty"`

	// OptionalPresent are optional items that were found.
	OptionalPresent []string `json:"optionalPresent,omitempty"`

	// OptionalMissing are optional items that were not found.
	OptionalMissing []string `json:"optionalMissing,omitempty"`
}

ChecklistResults tracks which items were found for checklist scales.

type ChecklistThreshold ¶

type ChecklistThreshold struct {
	// Required is "all" or a number of required items that must be present.
	Required string `json:"required,omitempty" yaml:"required,omitempty"`

	// Optional is the minimum number of optional items needed.
	Optional int `json:"optional,omitempty" yaml:"optional,omitempty"`
}

ChecklistThreshold defines pass criteria for checklist scales.

type CoverageReport ¶ added in v0.9.0

type CoverageReport struct {
	// Sections contains coverage for each named section.
	// Example keys: "components", "foundations", "functions", "lines"
	Sections map[string]CoverageSection `json:"sections"`

	// Overall is the aggregate coverage percentage (0-100).
	Overall int `json:"overall"`
}

CoverageReport aggregates coverage across multiple sections.

func NewCoverageReport ¶ added in v0.9.0

func NewCoverageReport() *CoverageReport

NewCoverageReport creates an empty coverage report.

func (*CoverageReport) AddSection ¶ added in v0.9.0

func (cr *CoverageReport) AddSection(name string, section CoverageSection)

AddSection adds a coverage section to the report.

func (*CoverageReport) AllComplete ¶ added in v0.9.0

func (cr *CoverageReport) AllComplete() bool

AllComplete returns true if all sections have 100% coverage.

func (*CoverageReport) ComputeOverall ¶ added in v0.9.0

func (cr *CoverageReport) ComputeOverall() int

ComputeOverall calculates the overall coverage percentage. Uses a simple average of all section percentages.

func (*CoverageReport) ComputeOverallWeighted ¶ added in v0.9.0

func (cr *CoverageReport) ComputeOverallWeighted(weights map[string]float64) int

ComputeOverallWeighted calculates overall coverage with weights. The weights map should have keys matching section names. Sections not in the weights map get weight 1.0.

func (*CoverageReport) GetSection ¶ added in v0.9.0

func (cr *CoverageReport) GetSection(name string) CoverageSection

GetSection retrieves a section by name. Returns an empty section if not found.

func (*CoverageReport) HasSection ¶ added in v0.9.0

func (cr *CoverageReport) HasSection(name string) bool

HasSection checks if a section exists.

func (*CoverageReport) MeetsThreshold ¶ added in v0.9.0

func (cr *CoverageReport) MeetsThreshold(threshold int) bool

MeetsThreshold returns true if overall coverage meets the threshold.

func (*CoverageReport) SectionsAboveThreshold ¶ added in v0.9.0

func (cr *CoverageReport) SectionsAboveThreshold(threshold int) []string

SectionsAboveThreshold returns section names with coverage >= threshold.

func (*CoverageReport) SectionsBelowThreshold ¶ added in v0.9.0

func (cr *CoverageReport) SectionsBelowThreshold(threshold int) []string

SectionsBelowThreshold returns section names with coverage < threshold.

func (*CoverageReport) SetSection ¶ added in v0.9.0

func (cr *CoverageReport) SetSection(name string, total, complete int, missing []string) *CoverageReport

SetSection is an alias for AddSection for fluent API.

type CoverageSection ¶ added in v0.9.0

type CoverageSection struct {
	// Total is the total number of items in this section.
	Total int `json:"total"`

	// Complete is the number of items that are complete/covered.
	Complete int `json:"complete"`

	// Percentage is the coverage percentage (0-100).
	Percentage int `json:"percentage"`

	// Missing lists the IDs or names of missing/incomplete items.
	Missing []string `json:"missing,omitempty"`
}

CoverageSection represents coverage metrics for a single section/area. This is a generic type that can be used for: - Spec coverage (components, foundations, patterns) - Code coverage (functions, lines, branches) - Test coverage (scenarios, edge cases) - Documentation coverage (API docs, guides)

type Criterion ¶ added in v0.10.0

type Criterion struct {
	// ID uniquely identifies this criterion within its category.
	ID string `json:"id,omitempty" yaml:"id,omitempty"`

	// Name is the human-readable criterion name.
	Name string `json:"name" yaml:"name"`

	// Weight is the relative importance within the category (default 1.0).
	Weight float64 `json:"weight,omitempty" yaml:"weight,omitempty"`

	// Pass, Partial, and Fail describe the scoring bands for this criterion.
	Pass    CriterionLevel `json:"pass" yaml:"pass"`
	Partial CriterionLevel `json:"partial,omitempty" yaml:"partial,omitempty"`
	Fail    CriterionLevel `json:"fail" yaml:"fail"`
}

Criterion is a weighted, independently scored check within a composite category. Rich rubrics group related criteria under a category so that both the category and each criterion carry a weight.

type CriterionLevel ¶ added in v0.10.0

type CriterionLevel struct {
	// Description explains what this score band means.
	Description string `json:"description,omitempty" yaml:"description,omitempty"`

	// Indicators are concrete signals an evaluator looks for at this band.
	Indicators []string `json:"indicators,omitempty" yaml:"indicators,omitempty"`
}

CriterionLevel is one scoring band for a criterion: what it means and the concrete indicators an evaluator looks for.

type Decision ¶

type Decision struct {
	// Status is the decision outcome.
	Status DecisionStatus `json:"status"`

	// Passed indicates if the evaluation passed.
	Passed bool `json:"passed"`

	// Rationale explains the decision.
	Rationale string `json:"rationale"`

	// FindingCounts summarizes findings by severity.
	FindingCounts FindingCounts `json:"findingCounts"`

	// CategoryCounts summarizes category results.
	CategoryCounts CategoryResultCounts `json:"categoryCounts"`
}

Decision represents the evaluation decision.

func EvaluateResults ¶

func EvaluateResults(results []CategoryResult, findings []Finding, criteria PassCriteria, rubricSet *RubricSet) Decision

EvaluateResults checks category results and findings against criteria.

type DecisionStatus ¶

type DecisionStatus string

DecisionStatus represents the decision outcome.

const (
	DecisionPass        DecisionStatus = "pass"         // Meets all criteria
	DecisionConditional DecisionStatus = "conditional"  // Partial scores or non-blocking findings
	DecisionFail        DecisionStatus = "fail"         // Has blocking findings or required categories failed
	DecisionHumanReview DecisionStatus = "human_review" // Requires human judgment
)

type EvaluationType ¶

type EvaluationType string

EvaluationType defines how evaluation is performed.

const (
	// EvaluationTypeAnalytic scores each category independently (recommended for LLM-as-Judge).
	EvaluationTypeAnalytic EvaluationType = "analytic"

	// EvaluationTypeHolistic provides a single overall score.
	EvaluationTypeHolistic EvaluationType = "holistic"
)

type Example ¶

type Example struct {
	// Excerpt is example content from a document.
	Excerpt string `json:"excerpt" yaml:"excerpt"`

	// Reasoning explains why this gets this score.
	// Including reasoning improves LLM alignment (chain-of-thought).
	Reasoning string `json:"reasoning" yaml:"reasoning"`
}

Example is a few-shot example for LLM evaluation.

type Finding ¶

type Finding struct {
	// ID is the unique identifier for this finding.
	ID string `json:"id"`

	// Category is the evaluation category this relates to.
	Category string `json:"category"`

	// Code is the standardized reason code for this finding.
	// Enables automated repair workflows.
	Code ReasonCode `json:"code,omitempty"`

	// Severity indicates the impact level.
	Severity Severity `json:"severity"`

	// Title is a brief summary of the finding.
	Title string `json:"title"`

	// Description provides detailed explanation.
	Description string `json:"description"`

	// Recommendation explains how to fix the issue.
	Recommendation string `json:"recommendation"`

	// Location is a reference to where the issue was found (e.g., "REQ-12", "Section 3.2").
	Location string `json:"location,omitempty"`

	// Evidence provides specific examples or references.
	Evidence string `json:"evidence,omitempty"`

	// Owner suggests who should address this finding.
	Owner string `json:"owner,omitempty"`

	// Effort estimates the work required (low, medium, high).
	Effort string `json:"effort,omitempty"`
}

Finding represents an issue discovered during evaluation.

func NewFinding ¶ added in v0.9.0

func NewFinding(id, category string, severity Severity, title, description string) *Finding

NewFinding creates a new finding with the required fields.

func NewFindingWithCode ¶ added in v0.9.0

func NewFindingWithCode(id, category string, code ReasonCode, title, description string) *Finding

NewFindingWithCode creates a new finding with a reason code.

func (*Finding) GetCodeInfo ¶ added in v0.9.0

func (f *Finding) GetCodeInfo() *ReasonCodeInfo

GetCodeInfo returns the reason code info for this finding's code.

func (*Finding) GetRepairHint ¶ added in v0.9.0

func (f *Finding) GetRepairHint() string

GetRepairHint returns the repair hint from the reason code registry. Deprecated: Use GetRepairPrompt instead.

func (*Finding) GetRepairPrompt ¶ added in v0.9.0

func (f *Finding) GetRepairPrompt() string

GetRepairPrompt returns the AI repair prompt from the reason code registry.

func (*Finding) IsBlocking ¶

func (f *Finding) IsBlocking() bool

IsBlocking returns true if this finding blocks approval.

func (*Finding) SetCode ¶ added in v0.9.0

func (f *Finding) SetCode(code ReasonCode) *Finding

SetCode sets the reason code on the finding.

func (*Finding) SetEffort ¶ added in v0.9.0

func (f *Finding) SetEffort(effort string) *Finding

SetEffort sets the effort estimate on the finding.

func (*Finding) SetEvidence ¶ added in v0.9.0

func (f *Finding) SetEvidence(evidence string) *Finding

SetEvidence sets the evidence on the finding.

func (*Finding) SetLocation ¶ added in v0.9.0

func (f *Finding) SetLocation(location string) *Finding

SetLocation sets the location reference on the finding.

func (*Finding) SetOwner ¶ added in v0.9.0

func (f *Finding) SetOwner(owner string) *Finding

SetOwner sets the owner on the finding.

func (*Finding) SetRecommendation ¶ added in v0.9.0

func (f *Finding) SetRecommendation(recommendation string) *Finding

SetRecommendation sets the recommendation on the finding.

type FindingCounts ¶

type FindingCounts struct {
	Critical int `json:"critical"`
	High     int `json:"high"`
	Medium   int `json:"medium"`
	Low      int `json:"low"`
	Info     int `json:"info"`
	Total    int `json:"total"`
}

FindingCounts tracks the number of findings by severity.

func CountFindings ¶

func CountFindings(findings []Finding) FindingCounts

CountFindings counts findings by severity.

func (FindingCounts) BlockingCount ¶

func (c FindingCounts) BlockingCount() int

BlockingCount returns the number of blocking findings.

func (FindingCounts) HasBlocking ¶

func (c FindingCounts) HasBlocking() bool

HasBlocking returns true if there are any blocking findings.

type FindingLimits ¶

type FindingLimits struct {
	Critical int `json:"critical" yaml:"critical"`
	High     int `json:"high" yaml:"high"`
	Medium   int `json:"medium" yaml:"medium"`
	Low      int `json:"low,omitempty" yaml:"low,omitempty"`
}

FindingLimits sets maximum allowed findings per severity. Use -1 for unlimited.

type IRRMetrics ¶

type IRRMetrics struct {
	// ExactAgreement is the percentage of exact score matches.
	ExactAgreement float64 `json:"exactAgreement"`

	// AdjacentAgreement is the percentage within ±1 of each other.
	AdjacentAgreement float64 `json:"adjacentAgreement"`

	// MeanAbsoluteDifference is the average absolute difference.
	MeanAbsoluteDifference float64 `json:"meanAbsoluteDifference"`

	// PearsonCorrelation measures linear correlation (-1 to 1).
	PearsonCorrelation float64 `json:"pearsonCorrelation"`

	// SampleSize is the number of paired ratings.
	SampleSize int `json:"sampleSize"`
}

IRRMetrics contains inter-rater reliability metrics. These metrics are useful when comparing LLM and human ratings.

func ComputeIRR ¶

func ComputeIRR(pairs []RatingPair) *IRRMetrics

ComputeIRR calculates inter-rater reliability metrics from paired ratings.

func ComputeIRRFromResults ¶

func ComputeIRRFromResults(results1, results2 []CategoryResult) *IRRMetrics

ComputeIRRFromResults computes IRR metrics from two sets of category results. Useful for comparing LLM evaluation with human ground truth.

type IntegerScore ¶ added in v0.9.0

type IntegerScore int

IntegerScore represents a 1-5 integer evaluation score. This scale is preferred for LLM judges as research shows they are unreliable at finer granularity than 5 levels.

const (
	// ScoreUnacceptable indicates the spec does not meet requirements.
	ScoreUnacceptable IntegerScore = 1

	// ScoreMajorRevisions indicates significant work is needed.
	ScoreMajorRevisions IntegerScore = 2

	// ScoreAcceptable indicates minimum requirements are met.
	ScoreAcceptable IntegerScore = 3

	// ScoreGood indicates the spec meets expectations well.
	ScoreGood IntegerScore = 4

	// ScoreExcellent indicates the spec exceeds expectations.
	ScoreExcellent IntegerScore = 5
)

func AllIntegerScores ¶ added in v0.9.0

func AllIntegerScores() []IntegerScore

AllIntegerScores returns all valid integer scores in descending order.

func ParseIntegerScore ¶ added in v0.9.0

func ParseIntegerScore(score int) IntegerScore

ParseIntegerScore converts an integer to IntegerScore, clamping to valid range.

func (IntegerScore) Icon ¶ added in v0.9.0

func (s IntegerScore) Icon() string

Icon returns the emoji icon for the score.

func (IntegerScore) IsPassing ¶ added in v0.9.0

func (s IntegerScore) IsPassing() bool

IsPassing returns true if the score is considered passing (4 or higher).

func (IntegerScore) IsValid ¶ added in v0.9.0

func (s IntegerScore) IsValid() bool

IsValid returns true if the score is in the valid 1-5 range.

func (IntegerScore) String ¶ added in v0.9.0

func (s IntegerScore) String() string

String returns the human-readable label for the score.

func (IntegerScore) ToCategorical ¶ added in v0.9.0

func (s IntegerScore) ToCategorical() ScoreValue

ToCategorical converts the integer score to a categorical ScoreValue. 1-2 = fail, 3 = partial, 4-5 = pass

type JudgeCategoricalScore ¶

type JudgeCategoricalScore struct {
	// JudgeID identifies the judge.
	JudgeID string `json:"judgeId"`

	// Score is the judge's categorical score.
	Score ScoreValue `json:"score"`
}

JudgeCategoricalScore is a categorical score from a specific judge.

type JudgeDisagreement ¶

type JudgeDisagreement struct {
	// Category is the evaluation dimension.
	Category string `json:"category"`

	// Scores are the individual judge scores.
	Scores []JudgeCategoricalScore `json:"scores"`

	// UniqueScores is the number of distinct scores given.
	UniqueScores int `json:"uniqueScores"`
}

JudgeDisagreement captures where judges had significantly different scores.

type JudgeMetadata ¶

type JudgeMetadata struct {
	// JudgeID is a unique identifier for this judge configuration.
	JudgeID string `json:"judge_id,omitempty"`

	// Model is the LLM model used (e.g., "claude-3-opus-20240229", "gpt-4-turbo").
	Model string `json:"model"`

	// ModelProvider is the API provider (e.g., "anthropic", "openai", "bedrock").
	ModelProvider string `json:"model_provider,omitempty"`

	// ModelVersion is the specific model version if applicable.
	ModelVersion string `json:"model_version,omitempty"`

	// PromptTemplate is the name/ID of the prompt template used.
	PromptTemplate string `json:"prompt_template,omitempty"`

	// PromptVersion is the version of the prompt template.
	PromptVersion string `json:"prompt_version,omitempty"`

	// SystemPrompt is the system prompt used (or hash/reference if too long).
	SystemPrompt string `json:"system_prompt,omitempty"`

	// Temperature is the sampling temperature used.
	Temperature float64 `json:"temperature,omitempty"`

	// MaxTokens is the max tokens setting.
	MaxTokens int `json:"max_tokens,omitempty"`

	// RubricID references the rubric set used for scoring.
	RubricID string `json:"rubric_id,omitempty"`

	// RubricVersion is the version of the rubric used.
	RubricVersion string `json:"rubric_version,omitempty"`

	// EvaluatedAt is when this evaluation was performed.
	EvaluatedAt time.Time `json:"evaluated_at,omitempty"`

	// Latency is the evaluation duration.
	Latency time.Duration `json:"latency,omitempty"`

	// TokensUsed tracks token consumption.
	TokensUsed *TokenUsage `json:"tokens_used,omitempty"`

	// TraceID links to observability trace (e.g., for Opik/Phoenix/Langfuse).
	TraceID string `json:"trace_id,omitempty"`

	// SpanID links to observability span.
	SpanID string `json:"span_id,omitempty"`
}

JudgeMetadata tracks information about the LLM judge that produced an evaluation. This enables reproducibility, debugging, and comparison of different judge configurations.

func NewJudgeMetadata ¶

func NewJudgeMetadata(model string) *JudgeMetadata

NewJudgeMetadata creates judge metadata with required fields.

func (*JudgeMetadata) SetLatency ¶

func (j *JudgeMetadata) SetLatency(d time.Duration)

SetLatency records the evaluation duration.

func (*JudgeMetadata) WithPrompt ¶

func (j *JudgeMetadata) WithPrompt(template, version string) *JudgeMetadata

WithPrompt sets the prompt template info.

func (*JudgeMetadata) WithProvider ¶

func (j *JudgeMetadata) WithProvider(provider string) *JudgeMetadata

WithProvider sets the model provider.

func (*JudgeMetadata) WithRubric ¶

func (j *JudgeMetadata) WithRubric(id, version string) *JudgeMetadata

WithRubric sets the rubric reference.

func (*JudgeMetadata) WithTemperature ¶

func (j *JudgeMetadata) WithTemperature(temp float64) *JudgeMetadata

WithTemperature sets the sampling temperature.

func (*JudgeMetadata) WithTokenUsage ¶

func (j *JudgeMetadata) WithTokenUsage(input, output int) *JudgeMetadata

WithTokenUsage sets the token usage.

func (*JudgeMetadata) WithTrace ¶

func (j *JudgeMetadata) WithTrace(traceID, spanID string) *JudgeMetadata

WithTrace links to observability.

type LikertAnchor ¶

type LikertAnchor struct {
	// Value is the numeric score.
	Value int `json:"value" yaml:"value"`

	// Label is the short label (e.g., "Excellent", "Good").
	Label string `json:"label" yaml:"label"`

	// Description explains what this score means.
	Description string `json:"description,omitempty" yaml:"description,omitempty"`
}

LikertAnchor describes what a specific score level means.

func StandardLikert5Anchors ¶

func StandardLikert5Anchors() []LikertAnchor

StandardLikert5Anchors returns standard 1-5 Likert anchors.

type LikertConfig ¶

type LikertConfig struct {
	// Min is the minimum score value (usually 1 or 0).
	Min int `json:"min" yaml:"min"`

	// Max is the maximum score value (usually 5).
	Max int `json:"max" yaml:"max"`

	// Anchors describe what each score level means.
	Anchors []LikertAnchor `json:"anchors,omitempty" yaml:"anchors,omitempty"`

	// PassThreshold is the minimum score for "pass" (default: top 40%).
	// For 1-5 scale, default is 4.
	PassThreshold *int `json:"passThreshold,omitempty" yaml:"passThreshold,omitempty"`

	// PartialThreshold is the minimum score for "partial" (default: middle).
	// For 1-5 scale, default is 3.
	PartialThreshold *int `json:"partialThreshold,omitempty" yaml:"partialThreshold,omitempty"`
}

LikertConfig defines a Likert scale configuration.

type MultiJudgeResult ¶

type MultiJudgeResult struct {
	// Evaluations are the individual judge evaluations.
	Evaluations []*Rubric `json:"evaluations"`

	// Judges contains metadata for each judge.
	Judges []*JudgeMetadata `json:"judges"`

	// AggregatedCategories are the combined category results.
	AggregatedCategories []CategoryResult `json:"aggregatedCategories"`

	// AggregationMethod describes how scores were combined.
	AggregationMethod AggregationMethod `json:"aggregationMethod"`

	// Agreement measures inter-judge agreement (0-1, higher = more agreement).
	Agreement float64 `json:"agreement"`

	// Disagreements lists categories where judges significantly disagreed.
	Disagreements []JudgeDisagreement `json:"disagreements,omitempty"`

	// ConsolidatedDecision is the final decision after aggregation.
	ConsolidatedDecision Decision `json:"consolidatedDecision"`

	// ConsolidatedFindings merges findings from all judges.
	ConsolidatedFindings []Finding `json:"consolidatedFindings"`
}

MultiJudgeResult aggregates evaluations from multiple judges. This improves reliability by combining perspectives and detecting disagreement.

func AggregateEvaluations ¶

func AggregateEvaluations(evaluations []*Rubric, method AggregationMethod) *MultiJudgeResult

AggregateEvaluations combines multiple evaluation reports.

type NextSteps ¶

type NextSteps struct {
	// RerunCommand is the command to re-run evaluation.
	RerunCommand string `json:"rerunCommand,omitempty"`

	// Immediate are blocking actions that must be completed.
	Immediate []ActionItem `json:"immediate,omitempty"`

	// Recommended are suggested improvements.
	Recommended []ActionItem `json:"recommended,omitempty"`
}

NextSteps provides actionable workflow guidance.

type PairwiseCategoryScore ¶

type PairwiseCategoryScore struct {
	// Category is the evaluation dimension.
	Category string `json:"category"`

	// Winner indicates which output won for this category.
	Winner PairwiseWinner `json:"winner"`

	// Margin indicates how much better the winner is (0-1, higher = larger gap).
	Margin float64 `json:"margin,omitempty"`

	// Reasoning explains the category-level comparison.
	Reasoning string `json:"reasoning,omitempty"`
}

PairwiseCategoryScore compares outputs on a specific dimension.

type PairwiseComparison ¶

type PairwiseComparison struct {
	// ID is the unique identifier for this comparison.
	ID string `json:"id,omitempty"`

	// Input is the shared input/prompt for both outputs.
	Input string `json:"input"`

	// OutputA is the first output being compared.
	OutputA string `json:"output_a"`

	// OutputB is the second output being compared.
	OutputB string `json:"output_b"`

	// Winner indicates which output won ("A", "B", or "tie").
	Winner PairwiseWinner `json:"winner"`

	// Confidence is the judge's confidence in the decision (0-1).
	Confidence float64 `json:"confidence,omitempty"`

	// Reasoning explains why this winner was chosen.
	Reasoning string `json:"reasoning"`

	// CategoryScores provides per-category comparisons if applicable.
	CategoryScores []PairwiseCategoryScore `json:"category_scores,omitempty"`

	// Judge contains metadata about the LLM judge.
	Judge *JudgeMetadata `json:"judge,omitempty"`

	// Metadata contains additional comparison context.
	Metadata map[string]any `json:"metadata,omitempty"`

	// CreatedAt is when this comparison was made.
	CreatedAt time.Time `json:"created_at,omitempty"`
}

PairwiseComparison represents a comparison between two outputs. This is an alternative to absolute scoring that can reduce position bias and improve reliability of LLM-as-Judge evaluations.

func NewPairwiseComparison ¶

func NewPairwiseComparison(input, outputA, outputB string) *PairwiseComparison

NewPairwiseComparison creates a new pairwise comparison.

func (*PairwiseComparison) AddCategoryScore ¶

func (p *PairwiseComparison) AddCategoryScore(category string, winner PairwiseWinner, reasoning string, margin float64)

AddCategoryScore adds a per-category comparison.

func (*PairwiseComparison) SetWinner ¶

func (p *PairwiseComparison) SetWinner(winner PairwiseWinner, reasoning string, confidence float64)

SetWinner sets the comparison result.

func (*PairwiseComparison) SwappedComparison ¶

func (p *PairwiseComparison) SwappedComparison() *PairwiseComparison

SwappedComparison creates a comparison with A and B swapped. Running both orders helps detect position bias in the judge.

type PairwiseResult ¶

type PairwiseResult struct {
	// Comparisons are all the individual comparisons.
	Comparisons []PairwiseComparison `json:"comparisons"`

	// WinRateA is the percentage of comparisons won by A.
	WinRateA float64 `json:"win_rate_a"`

	// WinRateB is the percentage of comparisons won by B.
	WinRateB float64 `json:"win_rate_b"`

	// TieRate is the percentage of ties.
	TieRate float64 `json:"tie_rate"`

	// OverallWinner is the aggregated winner.
	OverallWinner PairwiseWinner `json:"overall_winner"`

	// Confidence is the overall confidence in the result.
	Confidence float64 `json:"confidence"`
}

PairwiseResult aggregates multiple pairwise comparisons.

func ComputePairwiseResult ¶

func ComputePairwiseResult(comparisons []PairwiseComparison) *PairwiseResult

ComputeResult aggregates multiple comparisons into a result.

type PairwiseWinner ¶

type PairwiseWinner string

PairwiseWinner indicates the winner of a pairwise comparison.

const (
	// WinnerA indicates output A is better.
	WinnerA PairwiseWinner = "A"

	// WinnerB indicates output B is better.
	WinnerB PairwiseWinner = "B"

	// WinnerTie indicates both outputs are roughly equal.
	WinnerTie PairwiseWinner = "tie"

	// WinnerUncertain indicates the judge couldn't determine a winner.
	WinnerUncertain PairwiseWinner = "uncertain"
)

type PassCriteria ¶

type PassCriteria struct {
	// MinCategoriesPassing specifies how many categories must pass.
	// Values: "all", "all_required", or a number like "3".
	MinCategoriesPassing string `json:"minCategoriesPassing"`

	// MaxFindings limits findings by severity.
	// Use -1 for unlimited.
	MaxFindings *FindingLimits `json:"maxFindingsSeverity,omitempty"`

	// MinIntScore is the minimum overall IntegerScore (1-5) required to pass.
	// If set, the overall score must be >= this value.
	// Use 0 to disable this check.
	MinIntScore IntegerScore `json:"minIntScore,omitempty"`
}

PassCriteria defines the requirements for approval. Aligned with LLM-as-Judge best practices.

func DefaultPassCriteria ¶

func DefaultPassCriteria() PassCriteria

DefaultPassCriteria returns standard pass criteria. All required categories must pass, 0 critical/high findings allowed.

func StrictPassCriteria ¶

func StrictPassCriteria() PassCriteria

StrictPassCriteria returns strict pass criteria. All categories must pass, max 3 medium findings.

type RatingPair ¶

type RatingPair struct {
	// Rater1 is the first rater's score (e.g., human).
	Rater1 float64

	// Rater2 is the second rater's score (e.g., LLM).
	Rater2 float64

	// Category is the category being rated.
	Category string

	// ItemID identifies the item being rated.
	ItemID string
}

RatingPair represents a pair of ratings for the same item.

type ReasonCode ¶ added in v0.9.0

type ReasonCode string

ReasonCode is a standardized finding identifier using category prefixes. Format: {CATEGORY}-{ISSUE} Example: REQ-AMBIGUOUS, SEC-MISSING_AUTH

Reason codes enable automated repair workflows by providing machine-readable categorization of issues that map to specific repair strategies.

const (
	CodeREQAmbiguous     ReasonCode = "REQ-AMBIGUOUS"
	CodeREQNoCriteria    ReasonCode = "REQ-NO_CRITERIA"
	CodeREQConflict      ReasonCode = "REQ-CONFLICT"
	CodeREQIncomplete    ReasonCode = "REQ-INCOMPLETE"
	CodeREQUntestable    ReasonCode = "REQ-UNTESTABLE"
	CodeREQMissingReason ReasonCode = "REQ-MISSING_REASON"
)

Requirements codes (REQ-*)

const (
	CodeMETRICUnmeasurable ReasonCode = "METRIC-UNMEASURABLE"
	CodeMETRICNoBaseline   ReasonCode = "METRIC-NO_BASELINE"
	CodeMETRICNoTarget     ReasonCode = "METRIC-NO_TARGET"
	CodeMETRICUnrealistic  ReasonCode = "METRIC-UNREALISTIC"
	CodeMETRICNoTracking   ReasonCode = "METRIC-NO_TRACKING"
	CodeMETRICMissingKPI   ReasonCode = "METRIC-MISSING_KPI"
	CodeMETRICVanity       ReasonCode = "METRIC-VANITY"
)

Metrics codes (METRIC-*)

const (
	CodeUSERNoPersona      ReasonCode = "USER-NO_PERSONA"
	CodeUSERIncomplete     ReasonCode = "USER-INCOMPLETE"
	CodeUSERNoJourney      ReasonCode = "USER-NO_JOURNEY"
	CodeUSERUnclearProblem ReasonCode = "USER-UNCLEAR_PROBLEM"
	CodeUSERNoGoals        ReasonCode = "USER-NO_GOALS"
	CodeUSERNoPainPoints   ReasonCode = "USER-NO_PAIN_POINTS"
)

User codes (USER-*)

const (
	CodeARCHNoErrorHandling ReasonCode = "ARCH-NO_ERROR_HANDLING"
	CodeARCHNoAPI           ReasonCode = "ARCH-NO_API"
	CodeARCHNoDataModel     ReasonCode = "ARCH-NO_DATA_MODEL"
	CodeARCHMissingDep      ReasonCode = "ARCH-MISSING_DEP"
	CodeARCHGap             ReasonCode = "ARCH-GAP"
	CodeARCHNoInterface     ReasonCode = "ARCH-NO_INTERFACE"
	CodeARCHCircularDep     ReasonCode = "ARCH-CIRCULAR_DEP"
	CodeARCHTightCoupling   ReasonCode = "ARCH-TIGHT_COUPLING"
)

Architecture codes (ARCH-*)

const (
	CodeSECGap             ReasonCode = "SEC-GAP"
	CodeSECNoAuth          ReasonCode = "SEC-NO_AUTH"
	CodeSECNoAuthz         ReasonCode = "SEC-NO_AUTHZ"
	CodeSECPrivacy         ReasonCode = "SEC-PRIVACY"
	CodeSECNoValidation    ReasonCode = "SEC-NO_VALIDATION"
	CodeSECNoEncryption    ReasonCode = "SEC-NO_ENCRYPTION"
	CodeSECHardcodedSecret ReasonCode = "SEC-HARDCODED_SECRET" //nolint:gosec // G101 false positive: this is a reason code identifier, not a credential
	CodeSECInjectionRisk   ReasonCode = "SEC-INJECTION_RISK"
)

Security codes (SEC-*)

const (
	CodeSCALEConcern     ReasonCode = "SCALE-CONCERN"
	CodeSCALEPerformance ReasonCode = "SCALE-PERFORMANCE"
	CodeSCALENoCapacity  ReasonCode = "SCALE-NO_CAPACITY"
	CodeSCALESPOF        ReasonCode = "SCALE-SPOF"
	CodeSCALENoRateLimit ReasonCode = "SCALE-NO_RATE_LIMIT"
	CodeSCALENoCache     ReasonCode = "SCALE-NO_CACHE"
	CodeSCALEBlockingOp  ReasonCode = "SCALE-BLOCKING_OP"
)

Scalability codes (SCALE-*)

const (
	CodeINFRANoDeploy    ReasonCode = "INFRA-NO_DEPLOY"
	CodeINFRANoMonitor   ReasonCode = "INFRA-NO_MONITOR"
	CodeINFRANoAlert     ReasonCode = "INFRA-NO_ALERT"
	CodeINFRANoRecovery  ReasonCode = "INFRA-NO_RECOVERY"
	CodeINFRANoBackup    ReasonCode = "INFRA-NO_BACKUP"
	CodeINFRANoRunbook   ReasonCode = "INFRA-NO_RUNBOOK"
	CodeINFRANoRollback  ReasonCode = "INFRA-NO_ROLLBACK"
	CodeINFRAEnvMismatch ReasonCode = "INFRA-ENV_MISMATCH"
)

Infrastructure codes (INFRA-*)

const (
	CodeDOCInsufficient ReasonCode = "DOC-INSUFFICIENT"
	CodeDOCOutdated     ReasonCode = "DOC-OUTDATED"
	CodeDOCNoDiagram    ReasonCode = "DOC-NO_DIAGRAM"
	CodeDOCNoExamples   ReasonCode = "DOC-NO_EXAMPLES"
	CodeDOCInconsistent ReasonCode = "DOC-INCONSISTENT"
)

Documentation codes (DOC-*)

const (
	CodeSCOPECreep      ReasonCode = "SCOPE-CREEP"
	CodeSCOPEUnbounded  ReasonCode = "SCOPE-UNBOUNDED"
	CodeSCOPENoConstr   ReasonCode = "SCOPE-NO_CONSTRAINTS"
	CodeSCOPENoNonGoals ReasonCode = "SCOPE-NO_NON_GOALS"
	CodeSCOPEMVPUnclear ReasonCode = "SCOPE-MVP_UNCLEAR"
	CodeSCOPENoTimeline ReasonCode = "SCOPE-NO_TIMELINE"
)

Scope codes (SCOPE-*)

const (
	CodeUXNoARIA        ReasonCode = "UX-NO_ARIA"
	CodeUXNoErrorState  ReasonCode = "UX-NO_ERROR_STATE"
	CodeUXNoLoading     ReasonCode = "UX-NO_LOADING"
	CodeUXNoEmpty       ReasonCode = "UX-NO_EMPTY"
	CodeUXNoResponsive  ReasonCode = "UX-NO_RESPONSIVE"
	CodeUXNoKeyboard    ReasonCode = "UX-NO_KEYBOARD"
	CodeUXIncompleteNav ReasonCode = "UX-INCOMPLETE_NAV"
	CodeUXNoFeedback    ReasonCode = "UX-NO_FEEDBACK"
)

UX codes (UX-*)

const (
	CodeOther ReasonCode = "OTHER"
)

Generic codes

func GetBlockingCodes ¶ added in v0.9.0

func GetBlockingCodes(findings []Finding) []ReasonCode

GetBlockingCodes returns the reason codes from blocking findings.

func GetReasonCodesByCategory ¶ added in v0.9.0

func GetReasonCodesByCategory(category string) []ReasonCode

GetReasonCodesByCategory returns all reason codes in a category.

func GetReasonCodesBySpecType ¶ added in v0.9.0

func GetReasonCodesBySpecType(specType string) []ReasonCode

GetReasonCodesBySpecType returns reason codes applicable to a spec type.

func NormalizeCode ¶ added in v0.9.0

func NormalizeCode(code ReasonCode) ReasonCode

NormalizeCode converts legacy codes to new format.

type ReasonCodeInfo ¶ added in v0.9.0

type ReasonCodeInfo struct {
	// Code is the reason code identifier.
	Code ReasonCode `json:"code"`

	// Category is the prefix category (REQ, SEC, ARCH, etc.).
	Category string `json:"category"`

	// Description explains what this code means.
	Description string `json:"description"`

	// DefaultSeverity is the typical severity for this issue.
	DefaultSeverity Severity `json:"defaultSeverity"`

	// RepairPrompt is the AI prompt for automated repair.
	// This should be a clear instruction that an LLM can follow
	// to fix the issue in the spec document.
	RepairPrompt string `json:"repairPrompt"`

	// RequiresHuman indicates if human review is needed after AI repair.
	// True for security-critical, business-critical, or subjective issues.
	RequiresHuman bool `json:"requiresHuman"`

	// SpecTypes lists which spec types this code applies to.
	SpecTypes []string `json:"specTypes,omitempty"`
}

ReasonCodeInfo provides metadata about a reason code including information needed for AI-assisted automated repair.

func GetReasonCodeInfo ¶ added in v0.9.0

func GetReasonCodeInfo(code ReasonCode) *ReasonCodeInfo

GetReasonCodeInfo returns the info for a reason code, or nil if not found. Handles both legacy and new code formats.

type ReferenceData ¶

type ReferenceData struct {
	// ID is the unique identifier for this reference.
	ID string `json:"id,omitempty"`

	// Input is the input/prompt that produced the reference output.
	Input string `json:"input,omitempty"`

	// ExpectedOutput is the gold/reference output.
	ExpectedOutput string `json:"expected_output,omitempty"`

	// ExpectedOutputs allows multiple acceptable outputs.
	ExpectedOutputs []string `json:"expected_outputs,omitempty"`

	// Context provides additional context (e.g., retrieved documents for RAG).
	Context []string `json:"context,omitempty"`

	// Annotations are human-provided labels or scores.
	Annotations []Annotation `json:"annotations,omitempty"`

	// Source indicates where this reference came from.
	Source string `json:"source,omitempty"`

	// Tags categorize or filter references.
	Tags []string `json:"tags,omitempty"`

	// Metadata contains additional reference data.
	Metadata map[string]any `json:"metadata,omitempty"`
}

ReferenceData contains ground truth or expected data for evaluation. This enables reference-based evaluation where outputs are compared against known-good examples.

func NewReferenceData ¶

func NewReferenceData(input, expectedOutput string) *ReferenceData

NewReferenceData creates a new reference data item.

func (*ReferenceData) WithAnnotation ¶

func (r *ReferenceData) WithAnnotation(name string, score float64, annotatorID string) *ReferenceData

WithAnnotation adds a human annotation.

func (*ReferenceData) WithContext ¶

func (r *ReferenceData) WithContext(ctx ...string) *ReferenceData

WithContext adds context documents.

type ReferenceDataset ¶

type ReferenceDataset struct {
	// ID is the unique identifier for this dataset.
	ID string `json:"id"`

	// Name is the display name.
	Name string `json:"name"`

	// Description explains what this dataset contains.
	Description string `json:"description,omitempty"`

	// Version tracks dataset iterations.
	Version string `json:"version,omitempty"`

	// Items are the reference data items.
	Items []ReferenceData `json:"items"`

	// Tags categorize the dataset.
	Tags []string `json:"tags,omitempty"`

	// Metadata contains additional dataset info.
	Metadata map[string]any `json:"metadata,omitempty"`
}

ReferenceDataset is a collection of reference data items.

func NewReferenceDataset ¶

func NewReferenceDataset(id, name string) *ReferenceDataset

NewReferenceDataset creates a new reference dataset.

func (*ReferenceDataset) AddItem ¶

func (d *ReferenceDataset) AddItem(item ReferenceData)

AddItem adds a reference data item to the dataset.

func (*ReferenceDataset) GetByID ¶

func (d *ReferenceDataset) GetByID(id string) *ReferenceData

GetByID retrieves a reference item by ID.

type ReportMetadata ¶

type ReportMetadata struct {
	// Document is the filename or path being evaluated.
	Document string `json:"document"`

	// DocumentID is the document identifier (e.g., PRD ID).
	DocumentID string `json:"documentId,omitempty"`

	// DocumentTitle is the document title.
	DocumentTitle string `json:"documentTitle,omitempty"`

	// DocumentVersion is the document version.
	DocumentVersion string `json:"documentVersion,omitempty"`

	// GeneratedAt is when the report was created.
	GeneratedAt time.Time `json:"generatedAt"`

	// GeneratedBy identifies what created this report.
	GeneratedBy string `json:"generatedBy,omitempty"`

	// ReviewerID identifies the reviewer (agent or human).
	ReviewerID string `json:"reviewerId,omitempty"`
}

ReportMetadata contains report identification.

type Rubric ¶

type Rubric struct {
	// Schema is the JSON Schema URL.
	Schema string `json:"$schema,omitempty"`

	// SchemaVersion is the evaluation schema version (e.g., "v2").
	// Used for backwards compatibility.
	SchemaVersion string `json:"schemaVersion,omitempty"`

	// Metadata contains report identification and audit info.
	Metadata ReportMetadata `json:"metadata"`

	// ReviewType identifies the type of review (prd, arb, security, article, etc.).
	ReviewType string `json:"reviewType"`

	// Judge contains metadata about the LLM judge.
	Judge *JudgeMetadata `json:"judge,omitempty"`

	// RubricID references the rubric used for scoring.
	RubricID string `json:"rubricId,omitempty"`

	// RubricVersion is the version of the rubric used.
	RubricVersion string `json:"rubricVersion,omitempty"`

	// Reference contains gold/expected data for comparison.
	Reference *ReferenceData `json:"reference,omitempty"`

	// IntScore is the overall 1-5 integer score.
	// Preferred for LLM judges as they are unreliable at finer granularity.
	IntScore IntegerScore `json:"intScore,omitempty"`

	// Confidence is the overall confidence in the evaluation (0.0-1.0).
	// Low confidence evaluations may be routed to human review.
	Confidence float64 `json:"confidence,omitempty"`

	// Pass is an explicit pass/fail gate, orthogonal to score.
	// A spec can have a high score but still fail due to blocking issues.
	Pass bool `json:"pass"`

	// Blocking contains reason codes that caused failure.
	// Empty if Pass is true.
	Blocking []ReasonCode `json:"blocking,omitempty"`

	// Categories contains results for each evaluation dimension.
	Categories []CategoryResult `json:"categories"`

	// Findings are all issues discovered during evaluation.
	Findings []Finding `json:"findings"`

	// PassCriteria defines the requirements for approval.
	PassCriteria PassCriteria `json:"passCriteria"`

	// Decision is the evaluation outcome.
	Decision Decision `json:"decision"`

	// OverallDecision is a simplified pass/conditional/fail status.
	OverallDecision string `json:"overallDecision"`

	// NextSteps provides actionable guidance.
	NextSteps NextSteps `json:"nextSteps"`

	// Summary is the overall assessment.
	Summary string `json:"summary"`

	// Extensions contains domain-specific metadata.
	// Use this to store custom data without modifying the core schema.
	// Example: {"coverage": {...}, "metrics": {...}}
	Extensions map[string]any `json:"extensions,omitempty"`
}

Rubric is the detailed rubric-based evaluation report for LLM-as-Judge reviews.

func NewRubric ¶

func NewRubric(reviewType, document string) *Rubric

NewRubric creates a new rubric-based evaluation report.

func (*Rubric) AddBlocking ¶ added in v0.9.0

func (r *Rubric) AddBlocking(code ReasonCode) *Rubric

AddBlocking adds a blocking reason code.

func (*Rubric) AddCategoryResult ¶

func (r *Rubric) AddCategoryResult(cr CategoryResult)

AddCategoryResult adds a category result.

func (*Rubric) AddFinding ¶

func (r *Rubric) AddFinding(f Finding)

AddFinding adds a finding.

func (*Rubric) CollectBlockingCodes ¶ added in v0.9.0

func (r *Rubric) CollectBlockingCodes() []ReasonCode

CollectBlockingCodes gathers all blocking reason codes from findings.

func (*Rubric) ComputeOverallConfidence ¶ added in v0.9.0

func (r *Rubric) ComputeOverallConfidence() float64

ComputeOverallConfidence calculates the overall confidence from category confidences. Uses minimum confidence across all categories (weakest link).

func (*Rubric) ComputeOverallIntScore ¶ added in v0.9.0

func (r *Rubric) ComputeOverallIntScore(rubricSet *RubricSet) IntegerScore

ComputeOverallIntScore calculates the overall integer score from category scores. Uses weighted average of category IntScores.

func (*Rubric) Evaluate ¶

func (r *Rubric) Evaluate(rubricSet *RubricSet) Decision

Evaluate computes the decision based on findings and category results.

func (*Rubric) Finalize ¶

func (r *Rubric) Finalize(rubricSet *RubricSet, rerunCommand string)

Finalize computes all derived fields.

func (*Rubric) GenerateNextSteps ¶

func (r *Rubric) GenerateNextSteps(rerunCommand string)

GenerateNextSteps creates actionable next steps.

func (*Rubric) GenerateSummary ¶

func (r *Rubric) GenerateSummary() string

GenerateSummary creates the summary text.

func (*Rubric) GetCategoryResult ¶

func (r *Rubric) GetCategoryResult(categoryID string) *CategoryResult

GetCategoryResult returns a category result by ID, or nil if not found.

func (*Rubric) GetCoverage ¶ added in v0.9.0

func (r *Rubric) GetCoverage() *CoverageReport

GetCoverage retrieves coverage data from a Rubric's extensions. Returns nil if not set or if type assertion fails.

func (*Rubric) GetExtension ¶ added in v0.9.0

func (r *Rubric) GetExtension(key string) any

GetExtension retrieves an extension value by key. Returns nil if not found.

func (*Rubric) HasExtension ¶ added in v0.9.0

func (r *Rubric) HasExtension(key string) bool

HasExtension checks if an extension exists.

func (*Rubric) HasLowConfidence ¶ added in v0.9.0

func (r *Rubric) HasLowConfidence(threshold ...float64) bool

HasLowConfidence returns true if confidence is below the threshold (default 0.7).

func (*Rubric) IsV2 ¶ added in v0.9.0

func (r *Rubric) IsV2() bool

IsV2 returns true if this is a v2 schema evaluation.

func (*Rubric) NeedsHumanReview ¶ added in v0.9.0

func (r *Rubric) NeedsHumanReview(confidenceThreshold ...float64) bool

NeedsHumanReview returns true if this evaluation should be reviewed by a human.

func (*Rubric) SetBlocking ¶ added in v0.9.0

func (r *Rubric) SetBlocking(codes []ReasonCode) *Rubric

SetBlocking sets the blocking reason codes.

func (*Rubric) SetConfidence ¶ added in v0.9.0

func (r *Rubric) SetConfidence(confidence float64) *Rubric

SetConfidence sets the overall confidence value.

func (*Rubric) SetCoverage ¶ added in v0.9.0

func (r *Rubric) SetCoverage(coverage *CoverageReport)

SetCoverage is a convenience method to set coverage on a Rubric.

func (*Rubric) SetExtension ¶ added in v0.9.0

func (r *Rubric) SetExtension(key string, value any)

SetExtension sets a single extension value.

func (*Rubric) SetIntScore ¶ added in v0.9.0

func (r *Rubric) SetIntScore(score IntegerScore) *Rubric

SetIntScore sets the overall integer score.

func (*Rubric) SetJudge ¶

func (r *Rubric) SetJudge(judge *JudgeMetadata)

SetJudge sets the judge metadata.

func (*Rubric) SetPass ¶ added in v0.9.0

func (r *Rubric) SetPass(pass bool) *Rubric

SetPass sets the pass/fail status.

func (*Rubric) SetPassCriteria ¶

func (r *Rubric) SetPassCriteria(criteria PassCriteria)

SetPassCriteria sets the pass criteria.

func (*Rubric) SetReference ¶

func (r *Rubric) SetReference(ref *ReferenceData)

SetReference sets the reference data for comparison.

func (*Rubric) SetRubricInfo ¶

func (r *Rubric) SetRubricInfo(rubricID, rubricVersion string)

SetRubricInfo sets the rubric ID and version.

type RubricMetadata ¶

type RubricMetadata struct {
	CreatedAt string   `json:"createdAt,omitempty" yaml:"createdAt,omitempty"`
	Author    string   `json:"author,omitempty" yaml:"author,omitempty"`
	BasedOn   []string `json:"basedOn,omitempty" yaml:"basedOn,omitempty"`
}

RubricMetadata contains additional rubric information.

type RubricPassCriteria ¶

type RubricPassCriteria struct {
	// MinCategoriesPassing is "all", "all_required", or a number.
	MinCategoriesPassing string `json:"minCategoriesPassing,omitempty" yaml:"minCategoriesPassing,omitempty"`

	// MaxFindings limits findings by severity.
	MaxFindings *FindingLimits `json:"maxFindingsSeverity,omitempty" yaml:"maxFindingsSeverity,omitempty"`

	// ScoreThresholds optionally sets numeric pass/partial cutoffs (0-100) for
	// weighted-score rubrics (the rich form, where categories and criteria carry
	// weights and the overall score is a weighted roll-up).
	ScoreThresholds *ScoreThresholds `json:"scoreThresholds,omitempty" yaml:"scoreThresholds,omitempty"`
}

RubricPassCriteria defines requirements for overall pass/fail determination.

type RubricSet ¶

type RubricSet struct {
	// ID uniquely identifies this rubric set.
	ID string `json:"id" yaml:"id"`

	// Name is the human-readable name.
	Name string `json:"name" yaml:"name"`

	// Version is the semantic version of this rubric.
	Version string `json:"version" yaml:"version"`

	// Description explains what this rubric set evaluates.
	Description string `json:"description,omitempty" yaml:"description,omitempty"`

	// EvaluationType is "analytic" (per-category) or "holistic" (single score).
	// Analytic is recommended for LLM-as-Judge.
	EvaluationType EvaluationType `json:"evaluationType,omitempty" yaml:"evaluationType,omitempty"`

	// PassCriteria defines requirements for overall pass/fail.
	PassCriteria RubricPassCriteria `json:"passCriteria" yaml:"passCriteria"`

	// Categories are the evaluation dimensions.
	Categories []Category `json:"categories" yaml:"categories"`

	// JudgePromptTemplate is the prompt template for LLM evaluation.
	// Supports placeholders: {content}, {categories}, etc.
	JudgePromptTemplate string `json:"judgePromptTemplate,omitempty" yaml:"judgePromptTemplate,omitempty"`

	// Metadata contains additional information about the rubric.
	Metadata *RubricMetadata `json:"metadata,omitempty" yaml:"metadata,omitempty"`
}

RubricSet is a collection of rubrics for a complete evaluation. Follows Go-first principles: Go types are source of truth, JSON Schema generated from them.

func NewRubricSet ¶

func NewRubricSet(id, name, version string) *RubricSet

NewRubricSet creates a new rubric set with required fields.

func (*RubricSet) AddCategory ¶

func (rs *RubricSet) AddCategory(cat Category) *RubricSet

AddCategory adds a category to the rubric set.

func (*RubricSet) GetCategory ¶

func (rs *RubricSet) GetCategory(id string) *Category

GetCategory returns a category by ID, or nil if not found.

func (*RubricSet) GetRequiredCategories ¶

func (rs *RubricSet) GetRequiredCategories() []Category

GetRequiredCategories returns all required categories.

func (*RubricSet) SetJudgePrompt ¶

func (rs *RubricSet) SetJudgePrompt(template string) *RubricSet

SetJudgePrompt sets the judge prompt template.

func (*RubricSet) SetMetadata ¶

func (rs *RubricSet) SetMetadata(meta *RubricMetadata) *RubricSet

SetMetadata sets the rubric metadata.

func (*RubricSet) SetPassCriteria ¶

func (rs *RubricSet) SetPassCriteria(criteria RubricPassCriteria) *RubricSet

SetPassCriteria sets the pass criteria.

func (*RubricSet) ToJSON ¶

func (rs *RubricSet) ToJSON() ([]byte, error)

ToJSON serializes a rubric set to JSON.

func (*RubricSet) Validate ¶

func (rs *RubricSet) Validate() []string

Validate checks the rubric for common issues.

type Scale ¶

type Scale struct {
	// Type is "categorical", "checklist", "binary", or "likert".
	// Categorical with 2-3 options is recommended for LLM-as-Judge.
	// Likert is better for human comparison studies.
	Type ScaleType `json:"type" yaml:"type"`

	// Options are the scoring options (for categorical scales).
	Options []ScaleOption `json:"options,omitempty" yaml:"options,omitempty"`

	// RequiredItems are items that must be present (for checklist scales).
	RequiredItems []string `json:"requiredItems,omitempty" yaml:"requiredItems,omitempty"`

	// OptionalItems are items that add value (for checklist scales).
	OptionalItems []string `json:"optionalItems,omitempty" yaml:"optionalItems,omitempty"`

	// PassingThreshold defines pass criteria (for checklist scales).
	PassingThreshold *ChecklistThreshold `json:"passingThreshold,omitempty" yaml:"passingThreshold,omitempty"`

	// LikertConfig defines the likert scale (for likert scales).
	LikertConfig *LikertConfig `json:"likertConfig,omitempty" yaml:"likertConfig,omitempty"`
}

Scale defines the scoring mechanism for a category.

type ScaleOption ¶

type ScaleOption struct {
	// Value is the machine-readable value (e.g., "pass", "partial", "fail").
	Value string `json:"value" yaml:"value"`

	// Label is the human-readable label.
	Label string `json:"label" yaml:"label"`

	// Criteria are specific requirements for this score level.
	Criteria []string `json:"criteria" yaml:"criteria"`
}

ScaleOption is a single option in a categorical scale.

type ScaleType ¶

type ScaleType string

ScaleType defines the type of scoring scale.

const (
	// ScaleTypeCategorical uses discrete categories (pass/partial/fail).
	// Recommended for LLM-as-Judge - better calibrated than numeric scales.
	ScaleTypeCategorical ScaleType = "categorical"

	// ScaleTypeChecklist uses a list of required/optional items.
	ScaleTypeChecklist ScaleType = "checklist"

	// ScaleTypeBinary is simple pass/fail.
	ScaleTypeBinary ScaleType = "binary"

	// ScaleTypeLikert uses a numeric scale (e.g., 1-5).
	// Better for human comparison and inter-rater reliability studies.
	// Scores are mapped to categorical (pass/partial/fail) for decisions.
	ScaleTypeLikert ScaleType = "likert"
)

type ScoreThresholds ¶ added in v0.10.0

type ScoreThresholds struct {
	Pass    int `json:"pass" yaml:"pass"`
	Partial int `json:"partial" yaml:"partial"`
}

ScoreThresholds are numeric pass/partial cutoffs (0-100) for weighted-score rubrics. A score at or above Pass passes; at or above Partial is partial; below Partial fails.

type ScoreValue ¶

type ScoreValue string

ScoreValue represents a categorical score value.

const (
	ScorePass    ScoreValue = "pass"
	ScorePartial ScoreValue = "partial"
	ScoreFail    ScoreValue = "fail"
)

func LikertToCategorical ¶

func LikertToCategorical(score int, config *LikertConfig) ScoreValue

LikertToCategorical converts a Likert score to categorical (pass/partial/fail).

func (ScoreValue) Icon ¶

func (s ScoreValue) Icon() string

Icon returns the emoji icon for the score.

func (ScoreValue) IsFailing ¶

func (s ScoreValue) IsFailing() bool

IsFailing returns true if this score is failing.

func (ScoreValue) IsPartial ¶

func (s ScoreValue) IsPartial() bool

IsPartial returns true if this score is partial.

func (ScoreValue) IsPassing ¶

func (s ScoreValue) IsPassing() bool

IsPassing returns true if this score is considered passing.

type Severity ¶

type Severity string

Severity represents the severity level of a finding. Based on InfoSec severity classifications.

const (
	SeverityCritical Severity = "critical" // Blocks approval, must fix
	SeverityHigh     Severity = "high"     // Blocks approval, must fix
	SeverityMedium   Severity = "medium"   // Should fix before approval
	SeverityLow      Severity = "low"      // Nice to fix
	SeverityInfo     Severity = "info"     // Informational only
)

func AllSeverities ¶

func AllSeverities() []Severity

AllSeverities returns all severity levels in order of severity.

func WorstSeverity ¶ added in v0.11.0

func WorstSeverity(findings []Finding) Severity

WorstSeverity returns the highest-weight severity among findings, or the zero value if findings is empty. Used to roll a category's findings up into a single severity for prioritization (e.g. CategoryResult.Severity).

func (Severity) Icon ¶

func (s Severity) Icon() string

Icon returns the emoji icon for the severity.

func (Severity) IsBlocking ¶

func (s Severity) IsBlocking() bool

IsBlocking returns true if this severity blocks approval.

func (Severity) Weight ¶

func (s Severity) Weight() int

Weight returns a numeric weight for sorting (higher = more severe).

type TokenUsage ¶

type TokenUsage struct {
	// InputTokens is the number of input/prompt tokens.
	InputTokens int `json:"input_tokens"`

	// OutputTokens is the number of output/completion tokens.
	OutputTokens int `json:"output_tokens"`

	// TotalTokens is the total tokens used.
	TotalTokens int `json:"total_tokens"`

	// CacheReadTokens is tokens read from cache (if applicable).
	CacheReadTokens int `json:"cache_read_tokens,omitempty"`

	// CacheWriteTokens is tokens written to cache (if applicable).
	CacheWriteTokens int `json:"cache_write_tokens,omitempty"`
}

TokenUsage tracks token consumption for an evaluation.

type ValidationIssue ¶ added in v0.7.0

type ValidationIssue struct {
	// Path is the JSON path to the problematic field (e.g., "categories[0].score").
	Path string `json:"path"`

	// Code is a machine-readable error code.
	Code string `json:"code"`

	// Message describes the issue.
	Message string `json:"message"`

	// Severity indicates how serious the issue is.
	Severity ValidationSeverity `json:"severity"`

	// ActualValue is the invalid value found.
	ActualValue string `json:"actualValue,omitempty"`

	// AllowedValues lists valid options (for enum violations).
	AllowedValues []string `json:"allowedValues,omitempty"`
}

ValidationIssue represents a single validation problem.

type ValidationResult ¶ added in v0.7.0

type ValidationResult struct {
	// Valid is true if no errors were found (warnings allowed).
	Valid bool `json:"valid"`

	// Issues contains all validation problems found.
	Issues []ValidationIssue `json:"issues"`

	// ErrorCount is the number of error-level issues.
	ErrorCount int `json:"errorCount"`

	// WarningCount is the number of warning-level issues.
	WarningCount int `json:"warningCount"`
}

ValidationResult contains all validation issues found.

func ValidateReport ¶ added in v0.7.0

func ValidateReport(r *Rubric) *ValidationResult

ValidateReport validates a rubric evaluation report for correctness. It checks: - All enum values are valid (scores, severities, decision status) - Required fields are present - Counts are accurate - Decision is consistent with findings/categories

func ValidateRubricSetV2 ¶ added in v0.7.0

func ValidateRubricSetV2(rs *RubricSet) *ValidationResult

ValidateRubricSet validates a rubric definition (not a report). Returns a ValidationResult instead of []string for consistency.

func (*ValidationResult) HasErrors ¶ added in v0.7.0

func (r *ValidationResult) HasErrors() bool

HasErrors returns true if there are any error-level issues.

func (*ValidationResult) HasWarnings ¶ added in v0.7.0

func (r *ValidationResult) HasWarnings() bool

HasWarnings returns true if there are any warning-level issues.

func (*ValidationResult) String ¶ added in v0.7.0

func (r *ValidationResult) String() string

String returns a human-readable summary.

type ValidationSeverity ¶ added in v0.7.0

type ValidationSeverity string

ValidationSeverity indicates the severity of a validation issue.

const (
	// ValidationError is a fatal issue that must be fixed.
	ValidationError ValidationSeverity = "error"

	// ValidationWarning is a non-fatal issue that should be fixed.
	ValidationWarning ValidationSeverity = "warning"
)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Overview ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func AllReasonCodeCategories ¶ added in v0.9.0

func AllRequiredPassing ¶

func CountFindingsByCode ¶ added in v0.9.0

func GetRepairPrompt ¶ added in v0.9.0

func RequiresHumanReview ¶ added in v0.9.0

func ValidDecisionStatusValues ¶ added in v0.7.0

func ValidEvaluationTypeValues ¶ added in v0.7.0

func ValidIntegerScoreValues ¶ added in v0.9.0

func ValidReasonCodes ¶ added in v0.9.0

func ValidScaleTypeValues ¶ added in v0.7.0

func ValidScoreValues ¶ added in v0.7.0

func ValidSeverityValues ¶ added in v0.7.0

Types ¶

type ActionItem ¶

type AggregationMethod ¶

type Annotation ¶

type CategoricalAgreement ¶

func ComputeCategoricalAgreement ¶

type Category ¶

func NewCategory ¶

func (*Category) AddOption ¶

func (*Category) GetOptionForValue ¶

func (*Category) IsComposite ¶ added in v0.10.0

func (*Category) SetEvaluationPrompt ¶

func (*Category) SetExamples ¶

func (*Category) SetRequired ¶

func (*Category) SetWeight ¶

func (*Category) WithBinary ¶

func (*Category) WithChecklist ¶

func (*Category) WithLikert ¶

func (*Category) WithLikert5 ¶

func (*Category) WithPassPartialFail ¶

type CategoryExamples ¶

type CategoryResult ¶

func NewCategoryResult ¶

func NewCategoryResultFromLikert ¶

func NewCategoryResultWithIntScore ¶ added in v0.9.0

func NewCategoryResultWithNumeric ¶

func (*CategoryResult) AddEvidence ¶

func (*CategoryResult) AddFinding ¶

func (*CategoryResult) AddReasonCode ¶ added in v0.9.0

func (*CategoryResult) AddReasonCodes ¶ added in v0.9.0

func (*CategoryResult) GetNumericScore ¶

func (*CategoryResult) HasLowConfidence ¶ added in v0.9.0

func (*CategoryResult) HasNumericScore ¶

func (*CategoryResult) IsPassing ¶

func (*CategoryResult) SetChecklistResults ¶

func (*CategoryResult) SetConfidence ¶ added in v0.9.0

func (*CategoryResult) SetIntScore ¶ added in v0.9.0

func (*CategoryResult) SetNumericScore ¶

type CategoryResultCounts ¶

func CountResults ¶

func (CategoryResultCounts) AllPassing ¶

type ChecklistResults ¶

type ChecklistThreshold ¶

type CoverageReport ¶ added in v0.9.0

func NewCoverageReport ¶ added in v0.9.0

func (*CoverageReport) AddSection ¶ added in v0.9.0

func (*CoverageReport) AllComplete ¶ added in v0.9.0

func (*CoverageReport) ComputeOverall ¶ added in v0.9.0

func (*CoverageReport) ComputeOverallWeighted ¶ added in v0.9.0

func (*CoverageReport) GetSection ¶ added in v0.9.0

func (*CoverageReport) HasSection ¶ added in v0.9.0

func (*CoverageReport) MeetsThreshold ¶ added in v0.9.0

func (*CoverageReport) SectionsAboveThreshold ¶ added in v0.9.0

func (*CoverageReport) SectionsBelowThreshold ¶ added in v0.9.0

func (*CoverageReport) SetSection ¶ added in v0.9.0

type CoverageSection ¶ added in v0.9.0

type Criterion ¶ added in v0.10.0

type CriterionLevel ¶ added in v0.10.0

type Decision ¶

func EvaluateResults ¶

type DecisionStatus ¶

type EvaluationType ¶