evaluation

package

Go to main page

Versions in this module

v0

v0.11.3

Mar 9, 2026

v0.11.2

Mar 7, 2026

v0.11.1

Mar 6, 2026

v0.11.0

Mar 6, 2026

v0.10.3

Feb 27, 2026

v0.9.31

Feb 23, 2026

v0.9.30

Feb 20, 2026

v0.9.28

Feb 18, 2026

v0.9.27

Feb 16, 2026

v0.9.26

Feb 15, 2026

v0.9.25

Feb 15, 2026

v0.9.24

Feb 15, 2026

v0.9.23

Feb 13, 2026

v0.9.22

Feb 13, 2026

v0.9.21

Feb 13, 2026

v0.9.20

Feb 11, 2026

v0.9.19

Feb 10, 2026

v0.9.18

Feb 9, 2026

v0.9.17

Feb 6, 2026

v0.9.16

Feb 5, 2026

v0.9.15

Feb 4, 2026

v0.9.14

Jan 31, 2026

v0.9.13

Jan 30, 2026

v0.9.12

Jan 28, 2026

v0.9.11

Jan 27, 2026

Changes in this version

type TestCaseResult

+ CustomScores map[string]float64

+ func EvaluateTestCaseWithDataset(ctx context.Context, testCase *TestCase, actualAnswer string, ...) (*TestCaseResult, error)

v0.9.10

Jan 26, 2026

Changes in this version

+ func GetDefaultCustomEvaluatorDir() string

+ func UnmarshalYAML(data []byte, v interface{}) error

+ type ContextRelevanceEvaluator struct

+ func NewContextRelevanceEvaluator(llmClient llm.Client) *ContextRelevanceEvaluator

+ func (e *ContextRelevanceEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *ContextRelevanceEvaluator) Name() string

+ type CustomEvaluator struct

+ func NewCustomEvaluator(def *CustomEvaluatorDef, llmClient llm.Client) *CustomEvaluator

+ func (e *CustomEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *CustomEvaluator) Name() string

+ type CustomEvaluatorDef struct

+ Author string

+ Description string

+ Model *ModelConfig

+ Name string

+ Prompt string

+ Required bool

+ Scoring ScoringConfig

+ Tags []string

+ Version string

+ func (d *CustomEvaluatorDef) Validate() error

+ type CustomEvaluatorLoader struct

+ func NewCustomEvaluatorLoader(baseDir string) *CustomEvaluatorLoader

+ func (l *CustomEvaluatorLoader) Load(name string) (*CustomEvaluatorDef, error)

+ func (l *CustomEvaluatorLoader) LoadAll() ([]*CustomEvaluatorDef, error)

type Dataset

+ CustomEvaluators []string

type EvaluationSummary

+ AvgContextRelevance float64

+ AvgFaithfulness float64

+ type EvaluatorProvider interface

+ GetAccuracyEvaluator func() Evaluator

+ GetContextRelevanceEvaluator func() Evaluator

+ GetFaithfulnessEvaluator func() Evaluator

+ GetHallucinationEvaluator func() Evaluator

+ IsAvailable func(ctx context.Context) bool

+ Name func() string

+ func CreateProvider(ctx context.Context, providerType ProviderType, llmClient llm.Client) (EvaluatorProvider, error)

+ type FaithfulnessEvaluator struct

+ func NewFaithfulnessEvaluator(llmClient llm.Client) *FaithfulnessEvaluator

+ func (e *FaithfulnessEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *FaithfulnessEvaluator) Name() string

+ type LocalProvider struct

+ func NewLocalProvider(llmClient llm.Client) *LocalProvider

+ func (p *LocalProvider) GetAccuracyEvaluator() Evaluator

+ func (p *LocalProvider) GetContextRelevanceEvaluator() Evaluator

+ func (p *LocalProvider) GetFaithfulnessEvaluator() Evaluator

+ func (p *LocalProvider) GetHallucinationEvaluator() Evaluator

+ func (p *LocalProvider) IsAvailable(ctx context.Context) bool

+ func (p *LocalProvider) Name() string

+ type ModelConfig struct

+ Name string

+ Provider string

+ Temperature float64

+ type OpikAccuracyEvaluator struct

+ func (e *OpikAccuracyEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *OpikAccuracyEvaluator) Name() string

+ type OpikContextRelevanceEvaluator struct

+ func (e *OpikContextRelevanceEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *OpikContextRelevanceEvaluator) Name() string

+ type OpikFaithfulnessEvaluator struct

+ func (e *OpikFaithfulnessEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *OpikFaithfulnessEvaluator) Name() string

+ type OpikHallucinationEvaluator struct

+ func (e *OpikHallucinationEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *OpikHallucinationEvaluator) Name() string

+ type OpikProvider struct

+ func NewOpikProvider(config *llm.OpikConfig, llmClient llm.Client) (*OpikProvider, error)

+ func (p *OpikProvider) GetAccuracyEvaluator() Evaluator

+ func (p *OpikProvider) GetContextRelevanceEvaluator() Evaluator

+ func (p *OpikProvider) GetFaithfulnessEvaluator() Evaluator

+ func (p *OpikProvider) GetHallucinationEvaluator() Evaluator

+ func (p *OpikProvider) IsAvailable(ctx context.Context) bool

+ func (p *OpikProvider) Name() string

+ func (p *OpikProvider) Shutdown(ctx context.Context) error

+ type ProviderType string

+ const ProviderTypeLocal

+ const ProviderTypeOpik

+ func GetAvailableProviders(ctx context.Context, llmClient llm.Client) []ProviderType

+ func GetDefaultProvider() ProviderType

type Runner

+ func (r *Runner) RunEvaluationWithProvider(ctx context.Context, dataset *Dataset, agentName string, collection string, ...) (*EvaluationRun, error)

+ type ScoringConfig struct

+ Pattern string

+ Threshold float64

+ Type string

+ Weight float64

type TestCase

+ RetrievedContext []string

type TestCaseResult

+ ContextRelevanceScore float64

+ FaithfulnessScore float64

+ func EvaluateTestCaseWithLLMClient(ctx context.Context, testCase *TestCase, actualAnswer string, ...) (*TestCaseResult, error)

v0.9.9

Jan 23, 2026

Changes in this version

+ func GenerateRunID() string

+ func GetDefaultDatasetDir() string

+ func GetDefaultResultsDir() string

+ func SaveDataset(dataset *Dataset, filepath string) error

+ func SaveResults(run *EvaluationRun, format string) (string, error)

+ func SaveResultsToFile(run *EvaluationRun, filePath, format string) error

+ type AccuracyEvaluator struct

+ func NewAccuracyEvaluator(llmClient llm.Client) *AccuracyEvaluator

+ func (e *AccuracyEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *AccuracyEvaluator) Name() string

+ type CitationEvaluator struct

+ func NewCitationEvaluator() *CitationEvaluator

+ func (e *CitationEvaluator) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *CitationEvaluator) Name() string

+ type Dataset struct

+ Author string

+ Config DatasetConfig

+ Description string

+ Name string

+ Tags []string

+ TestCases []TestCase

+ Version string

+ func ListDatasets() ([]*Dataset, error)

+ func LoadDataset(filepath string) (*Dataset, error)

+ func (d *Dataset) Validate() error

+ type DatasetConfig struct

+ AllowHallucination bool

+ DefaultAgent string

+ DefaultCollection string

+ MinAccuracyScore float64

+ MinCitationScore float64

+ type EvaluationConfig struct

+ AgentPath string

+ Parameters map[string]string

+ type EvaluationRun struct

+ AgentName string

+ Collection string

+ Config EvaluationConfig

+ DatasetName string

+ ID string

+ Results []TestCaseResult

+ Summary EvaluationSummary

+ Timestamp time.Time

+ func ListResults() ([]*EvaluationRun, error)

+ func LoadResults(runID string) (*EvaluationRun, error)

+ type EvaluationSummary struct

+ AvgAccuracy float64

+ AvgCitation float64

+ AvgHallucination float64

+ AvgTime float64

+ Duration time.Duration

+ EndTime time.Time

+ FailedTests int

+ PassRate float64

+ PassedTests int

+ StartTime time.Time

+ TotalTests int

+ TotalTime float64

+ func CalculateSummary(results []TestCaseResult, startTime, endTime time.Time) EvaluationSummary

+ type Evaluator interface

+ Evaluate func(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ Name func() string

+ type HallucinationDetector struct

+ func NewHallucinationDetector(llmClient llm.Client) *HallucinationDetector

+ func (e *HallucinationDetector) Evaluate(ctx context.Context, testCase *TestCase, actual string, ...) (float64, error)

+ func (e *HallucinationDetector) Name() string

+ type Runner struct

+ func NewRunner(llmClient llm.Client) *Runner

+ func (r *Runner) RunEvaluation(ctx context.Context, dataset *Dataset, agentName string, collection string) (*EvaluationRun, error)

+ type TestCase struct

+ Collection string

+ Description string

+ ExpectedAnswer string

+ ExpectedCitations []string

+ ID string

+ MinRelevanceScore float64

+ MustCite bool

+ Query string

+ RequiredConcepts []string

+ Tags []string

+ func (tc *TestCase) Validate() error

+ type TestCaseResult struct

+ AccuracyScore float64

+ ActualAnswer string

+ ActualCitations []string

+ CitationScore float64

+ Details map[string]interface{}

+ Errors []string

+ HallucinationScore float64

+ Passed bool

+ Query string

+ ResponseTime float64

+ TestCaseID string

+ func EvaluateTestCase(ctx context.Context, testCase *TestCase, actualAnswer string, ...) (*TestCaseResult, error)

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL