Versions in this module Expand all Collapse all v0 v0.9.9 Jan 23, 2026 Changes in this version + func GenerateRunID() string + func GetDefaultDatasetDir() string + func GetDefaultResultsDir() string + func SaveDataset(dataset *Dataset, filepath string) error + func SaveResults(run *EvaluationRun, format string) (string, error) + func SaveResultsToFile(run *EvaluationRun, filePath, format string) error + type AccuracyEvaluator struct + func NewAccuracyEvaluator(llmClient llm.Client) *AccuracyEvaluator + func (e *AccuracyEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error) + func (e *AccuracyEvaluator) Name() string + type CitationEvaluator struct + func NewCitationEvaluator() *CitationEvaluator + func (e *CitationEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error) + func (e *CitationEvaluator) Name() string + type Dataset struct + Author string + Config DatasetConfig + Description string + Name string + Tags []string + TestCases []TestCase + Version string + func ListDatasets() ([]*Dataset, error) + func LoadDataset(filepath string) (*Dataset, error) + func (d *Dataset) Validate() error + type DatasetConfig struct + AllowHallucination bool + DefaultAgent string + DefaultCollection string + MinAccuracyScore float64 + MinCitationScore float64 + type EvaluationConfig struct + AgentPath string + Parameters map[string]string + type EvaluationRun struct + AgentName string + Collection string + Config EvaluationConfig + DatasetName string + ID string + Results []TestCaseResult + Summary EvaluationSummary + Timestamp time.Time + func ListResults() ([]*EvaluationRun, error) + func LoadResults(runID string) (*EvaluationRun, error) + type EvaluationSummary struct + AvgAccuracy float64 + AvgCitation float64 + AvgHallucination float64 + AvgTime float64 + Duration time.Duration + EndTime time.Time + FailedTests int + PassRate float64 + PassedTests int + StartTime time.Time + TotalTests int + TotalTime float64 + func CalculateSummary(results []TestCaseResult, startTime, endTime time.Time) EvaluationSummary + type Evaluator interface + Evaluate func(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error) + Name func() string + type HallucinationDetector struct + func NewHallucinationDetector(llmClient llm.Client) *HallucinationDetector + func (e *HallucinationDetector) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error) + func (e *HallucinationDetector) Name() string + type Runner struct + func NewRunner(llmClient llm.Client) *Runner + func (r *Runner) RunEvaluation(ctx context.Context, dataset *Dataset, agentName string, collection string) (*EvaluationRun, error) + type TestCase struct + Collection string + Description string + ExpectedAnswer string + ExpectedCitations []string + ID string + MinRelevanceScore float64 + MustCite bool + Query string + RequiredConcepts []string + Tags []string + func (tc *TestCase) Validate() error + type TestCaseResult struct + AccuracyScore float64 + ActualAnswer string + ActualCitations []string + CitationScore float64 + Details map[string]interface{} + Errors []string + HallucinationScore float64 + Passed bool + Query string + ResponseTime float64 + TestCaseID string + func EvaluateTestCase(ctx context.Context, testCase *TestCase, actualAnswer string, ...) (*TestCaseResult, error)