eval

package
v0.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 26, 2026 License: MIT Imports: 10 Imported by: 0

Documentation

Overview

Package eval provides evaluation orchestration for spec documents.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type CategoryResult

type CategoryResult struct {
	ID          string  `json:"id"`
	Name        string  `json:"name"`
	Score       float64 `json:"score"`
	Weight      float64 `json:"weight"`
	Explanation string  `json:"explanation"`
}

CategoryResult contains the evaluation result for a category.

type Evaluator

type Evaluator struct {
	// contains filtered or unexported fields
}

Evaluator performs evaluations using an LLM judge.

func NewEvaluator

func NewEvaluator(llm *LLMClient) *Evaluator

NewEvaluator creates a new evaluator with the given LLM client.

func (*Evaluator) Evaluate

func (e *Evaluator) Evaluate(ctx context.Context, specType types.SpecType, content string) (*Result, error)

Evaluate runs evaluation on content against the rubric for the given spec type.

func (*Evaluator) SetRubricLoader

func (e *Evaluator) SetRubricLoader(loader rubrics.Loader)

SetRubricLoader sets a custom rubric loader for evaluation.

type Finding

type Finding struct {
	Severity       string `json:"severity"`
	Category       string `json:"category"`
	Title          string `json:"title"`
	Description    string `json:"description"`
	Recommendation string `json:"recommendation"`
	Evidence       string `json:"evidence,omitempty"`
}

Finding represents an issue found during evaluation.

type JudgeMetadata

type JudgeMetadata struct {
	Model       string  `json:"model"`
	Provider    string  `json:"provider"`
	Temperature float64 `json:"temperature"`
	Tokens      int     `json:"tokens"`
}

JudgeMetadata records information about the LLM judge.

type LLMClient

type LLMClient struct {
	// contains filtered or unexported fields
}

LLMClient wraps omnillm for evaluation requests.

func NewLLMClient

func NewLLMClient(cfg LLMConfig) (*LLMClient, error)

NewLLMClient creates a new LLM client with the given configuration.

func NewLLMClientFromEnv

func NewLLMClientFromEnv() (*LLMClient, error)

NewLLMClientFromEnv creates an LLM client using environment configuration. It tries providers in order: ANTHROPIC, OPENAI, GEMINI.

func NewLLMClientFromProject

func NewLLMClientFromProject(projectCfg *types.LLMConfig) (*LLMClient, error)

NewLLMClientFromProject creates an LLM client using project configuration. Project config values take precedence; missing values fall back to environment defaults.

func (*LLMClient) Close

func (c *LLMClient) Close() error

Close releases resources.

func (*LLMClient) Complete

func (c *LLMClient) Complete(ctx context.Context, prompt string) (string, JudgeMetadata, error)

Complete sends a prompt to the LLM and returns the response.

type LLMConfig

type LLMConfig struct {
	Provider    string  // Provider name (openai, anthropic, gemini, etc.)
	Model       string  // Model name
	APIKey      string  // API key (optional if env var is set)
	Temperature float64 // Temperature for generation (default 0.0 for deterministic)
	MaxTokens   int     // Max tokens for response (default 4096)
}

LLMConfig configures the LLM client.

func DefaultLLMConfig

func DefaultLLMConfig() LLMConfig

DefaultLLMConfig returns default configuration for evaluation.

type Result

type Result struct {
	SpecType   types.SpecType   `json:"spec_type"`
	Timestamp  time.Time        `json:"timestamp"`
	Score      float64          `json:"score"`
	Passed     bool             `json:"passed"`
	Categories []CategoryResult `json:"categories"`
	Findings   []Finding        `json:"findings"`
	Decision   string           `json:"decision"`
	Summary    string           `json:"summary"`
	Judge      JudgeMetadata    `json:"judge"`
}

Result represents the outcome of an evaluation.

func (*Result) ToEvaluationReport

func (r *Result) ToEvaluationReport(rubricSet *rubrics.RubricSet) *evaluation.EvaluationReport

ToEvaluationReport converts the result to a structured-evaluation report. The rubricSet parameter is required for finalization.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL