eval

package

v0.7.0 Latest Latest Go to latest Published: Jun 1, 2026 License: MIT Imports: 14 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/ProductBuildersHQ/visionspec

Links

Open Source Insights

Documentation ¶

Overview ¶

Package eval provides evaluation orchestration for spec documents.

Index ¶

func ClaimsFromCategoryResults(categories []CategoryResult, timestamp time.Time) []claims.Claim
func CreateMultiEvalSummary(project string, version string, results map[string]*Result, ...) *summary.SummaryReport
func CreateSingleEvalSummary(project string, specType string, result *Result, evalReport *rubric.Rubric, ...) *summary.SummaryReport
func RenderEvaluationReportMarkdown(w io.Writer, report *rubric.Rubric) error
type CategoryResult
type EvalSummary
- func NewEvalSummary(project, version string) *EvalSummary
- func (s *EvalSummary) AddResult(specType string, result *Result, evalReport *rubric.Rubric, ...)
- func (s *EvalSummary) IsAllPassing() bool
- func (s *EvalSummary) ToSummaryReport(phase string) *summary.SummaryReport
- func (s *EvalSummary) TotalScore() float64
type Evaluator
- func NewEvaluator(llm *LLMClient) *Evaluator
- func (e *Evaluator) Evaluate(ctx context.Context, specType types.SpecType, content string) (*Result, error)
- func (e *Evaluator) SetRubricLoader(loader rubrics.Loader)
type Finding
type JudgeMetadata
type LLMClient
- func NewLLMClient(cfg LLMConfig) (*LLMClient, error)
- func NewLLMClientFromEnv() (*LLMClient, error)
- func NewLLMClientFromProject(projectCfg *types.LLMConfig) (*LLMClient, error)
- func (c *LLMClient) Close() error
- func (c *LLMClient) Complete(ctx context.Context, prompt string) (string, JudgeMetadata, error)
type LLMConfig
- func DefaultLLMConfig() LLMConfig
type MarkdownRenderer
- func NewMarkdownRenderer() *MarkdownRenderer
- func (r *MarkdownRenderer) Render(w io.Writer, result *Result) error
type Renderer
type Result
- func (r *Result) ToClaimsReport(document string) *claims.ClaimsReport
- func (r *Result) ToEvaluationReport(rubricSet *rubrics.RubricSet) *rubric.Rubric
type TerminalRenderer
- func NewTerminalRenderer(verbose bool) *TerminalRenderer
- func (r *TerminalRenderer) Render(w io.Writer, result *Result) error

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func ClaimsFromCategoryResults ¶ added in v0.7.0

func ClaimsFromCategoryResults(categories []CategoryResult, timestamp time.Time) []claims.Claim

ClaimsFromCategoryResults creates claims from category results.

func CreateMultiEvalSummary ¶ added in v0.7.0

func CreateMultiEvalSummary(
	project string,
	version string,
	results map[string]*Result,
	evalReports map[string]*rubric.Rubric,
	claimsReports map[string]*claims.ClaimsReport,
) *summary.SummaryReport

CreateMultiEvalSummary creates a summary with multiple evaluations.

func CreateSingleEvalSummary ¶ added in v0.7.0

func CreateSingleEvalSummary(
	project string,
	specType string,
	result *Result,
	evalReport *rubric.Rubric,
	claimsReport *claims.ClaimsReport,
) *summary.SummaryReport

CreateSingleEvalSummary creates a summary with a single evaluation.

func RenderEvaluationReportMarkdown ¶ added in v0.7.0

func RenderEvaluationReportMarkdown(w io.Writer, report *rubric.Rubric) error

RenderEvaluationReportMarkdown renders a structured-evaluation report to markdown.

Types ¶

type CategoryResult ¶

type CategoryResult struct {
	ID          string  `json:"id"`
	Name        string  `json:"name"`
	Score       float64 `json:"score"`
	Weight      float64 `json:"weight"`
	Explanation string  `json:"explanation"`
}

CategoryResult contains the evaluation result for a category.

type EvalSummary ¶ added in v0.7.0

type EvalSummary struct {
	// Project is the project name.
	Project string

	// Version is the project version.
	Version string

	// Results are the individual evaluation results keyed by spec type.
	Results map[string]*Result

	// EvaluationReports are the structured evaluation reports.
	EvaluationReports map[string]*rubric.Rubric

	// ClaimsReports are the claims extracted from findings.
	ClaimsReports map[string]*claims.ClaimsReport
}

EvalSummary aggregates multiple evaluation results with embedded reports.

func NewEvalSummary ¶ added in v0.7.0

func NewEvalSummary(project, version string) *EvalSummary

NewEvalSummary creates a new evaluation summary.

func (*EvalSummary) AddResult ¶ added in v0.7.0

func (s *EvalSummary) AddResult(specType string, result *Result, evalReport *rubric.Rubric, claimsReport *claims.ClaimsReport)

AddResult adds an evaluation result to the summary.

func (*EvalSummary) IsAllPassing ¶ added in v0.7.0

func (s *EvalSummary) IsAllPassing() bool

IsAllPassing returns true if all evaluations passed.

func (*EvalSummary) ToSummaryReport ¶ added in v0.7.0

func (s *EvalSummary) ToSummaryReport(phase string) *summary.SummaryReport

ToSummaryReport converts to a structured-evaluation SummaryReport. The report embeds full-fidelity EvaluationReport and ClaimsReport.

func (*EvalSummary) TotalScore ¶ added in v0.7.0

func (s *EvalSummary) TotalScore() float64

TotalScore returns the average score across all evaluations.

type Evaluator ¶

type Evaluator struct {
	// contains filtered or unexported fields
}

Evaluator performs evaluations using an LLM judge.

func NewEvaluator ¶

func NewEvaluator(llm *LLMClient) *Evaluator

NewEvaluator creates a new evaluator with the given LLM client.

func (*Evaluator) Evaluate ¶

func (e *Evaluator) Evaluate(ctx context.Context, specType types.SpecType, content string) (*Result, error)

Evaluate runs evaluation on content against the rubric for the given spec type.

func (*Evaluator) SetRubricLoader ¶

func (e *Evaluator) SetRubricLoader(loader rubrics.Loader)

SetRubricLoader sets a custom rubric loader for evaluation.

type Finding ¶

type Finding struct {
	Severity       string `json:"severity"`
	Category       string `json:"category"`
	Title          string `json:"title"`
	Description    string `json:"description"`
	Recommendation string `json:"recommendation"`
	Evidence       string `json:"evidence,omitempty"`
}

Finding represents an issue found during evaluation.

type JudgeMetadata ¶

type JudgeMetadata struct {
	Model       string  `json:"model"`
	Provider    string  `json:"provider"`
	Temperature float64 `json:"temperature"`
	Tokens      int     `json:"tokens"`
}

JudgeMetadata records information about the LLM judge.

type LLMClient ¶

type LLMClient struct {
	// contains filtered or unexported fields
}

LLMClient wraps omnillm for evaluation requests.

func NewLLMClient ¶

func NewLLMClient(cfg LLMConfig) (*LLMClient, error)

NewLLMClient creates a new LLM client with the given configuration.

func NewLLMClientFromEnv ¶

func NewLLMClientFromEnv() (*LLMClient, error)

NewLLMClientFromEnv creates an LLM client using environment configuration. It tries providers in order: ANTHROPIC, OPENAI, GEMINI.

func NewLLMClientFromProject ¶

func NewLLMClientFromProject(projectCfg *types.LLMConfig) (*LLMClient, error)

NewLLMClientFromProject creates an LLM client using project configuration. Project config values take precedence; missing values fall back to environment defaults.

func (*LLMClient) Close ¶

func (c *LLMClient) Close() error

Close releases resources.

func (*LLMClient) Complete ¶

func (c *LLMClient) Complete(ctx context.Context, prompt string) (string, JudgeMetadata, error)

Complete sends a prompt to the LLM and returns the response.

type LLMConfig ¶

type LLMConfig struct {
	Provider    string  // Provider name (openai, anthropic, gemini, etc.)
	Model       string  // Model name
	APIKey      string  // API key (optional if env var is set)
	Temperature float64 // Temperature for generation (default 0.0 for deterministic)
	MaxTokens   int     // Max tokens for response (default 4096)
}

LLMConfig configures the LLM client.

func DefaultLLMConfig ¶

func DefaultLLMConfig() LLMConfig

DefaultLLMConfig returns default configuration for evaluation.

type MarkdownRenderer ¶ added in v0.7.0

type MarkdownRenderer struct{}

MarkdownRenderer renders results to markdown format.

func NewMarkdownRenderer ¶ added in v0.7.0

func NewMarkdownRenderer() *MarkdownRenderer

NewMarkdownRenderer creates a new markdown renderer.

func (*MarkdownRenderer) Render ¶ added in v0.7.0

func (r *MarkdownRenderer) Render(w io.Writer, result *Result) error

Render writes the result as markdown to the writer.

type Renderer ¶ added in v0.7.0

type Renderer interface {
	Render(w io.Writer, result *Result) error
}

Renderer renders evaluation results to various formats.

type Result ¶

type Result struct {
	SpecType   types.SpecType   `json:"spec_type"`
	Timestamp  time.Time        `json:"timestamp"`
	Score      float64          `json:"score"`
	Passed     bool             `json:"passed"`
	Categories []CategoryResult `json:"categories"`
	Findings   []Finding        `json:"findings"`
	Decision   string           `json:"decision"`
	Summary    string           `json:"summary"`
	Judge      JudgeMetadata    `json:"judge"`
}

Result represents the outcome of an evaluation.

func (*Result) ToClaimsReport ¶ added in v0.7.0

func (r *Result) ToClaimsReport(document string) *claims.ClaimsReport

ToClaimsReport extracts claims from evaluation findings. Each finding becomes a claim with internal validation based on the evaluation.

func (*Result) ToEvaluationReport ¶

func (r *Result) ToEvaluationReport(rubricSet *rubrics.RubricSet) *rubric.Rubric

ToEvaluationReport converts the result to a structured-evaluation report. The rubricSet parameter is required for finalization.

type TerminalRenderer ¶ added in v0.7.0

type TerminalRenderer struct {
	Verbose bool
}

TerminalRenderer renders results for terminal output.

func NewTerminalRenderer ¶ added in v0.7.0

func NewTerminalRenderer(verbose bool) *TerminalRenderer

NewTerminalRenderer creates a new terminal renderer.

func (*TerminalRenderer) Render ¶ added in v0.7.0

func (r *TerminalRenderer) Render(w io.Writer, result *Result) error

Render writes the result to the terminal.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL