evals

package
v1.7.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 11, 2025 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func LLMRubricT

func LLMRubricT(e *EvalT, rubric, output string)

func NumEvalsOrSkip

func NumEvalsOrSkip(t *testing.T) int

func RecordScore

func RecordScore(e *EvalT, result *EvalResult)

RecordScore records the score of an eval in a JSONL file.

func Run

func Run(t *testing.T, name string, f func(e *EvalT))

Types

type Eval

type Eval struct {
	LLM       llm.LanguageModel
	GraderLLM llm.LanguageModel
	Prompts   *llm.Prompts
	// contains filtered or unexported fields
}

func NewEval

func NewEval() (*Eval, error)

func NewEvalWithProvider added in v1.7.0

func NewEvalWithProvider(providerName string) (*Eval, error)

NewEvalWithProvider creates an Eval instance with a specific provider

func (*Eval) LLMRubric

func (e *Eval) LLMRubric(rubric, output string) (*RubricResult, error)

type EvalLogLine

type EvalLogLine struct {
	Name      string  `json:"name"`
	Timestamp string  `json:"timestamp"`
	RunNumber int     `json:"run_number"`
	Rubric    string  `json:"rubric"`
	Output    string  `json:"output"`
	Reasoning string  `json:"reasoning"`
	Score     float64 `json:"score"`
	Pass      bool    `json:"pass"`
}

type EvalResult

type EvalResult struct {
	Rubric    string  `json:"rubric"`
	Output    string  `json:"output"`
	Reasoning string  `json:"reasoning"`
	Score     float64 `json:"score"`
	Pass      bool    `json:"pass"`
}

type EvalT

type EvalT struct {
	*testing.T
	*Eval
}

type RubricResult

type RubricResult struct {
	Reasoning string
	Score     float64
	Pass      bool
}

type ThreadExport

type ThreadExport struct {
	Posts     map[string]*model.Post     `json:"posts"`
	Channel   *model.Channel             `json:"channel"`
	Team      *model.Team                `json:"team"`
	Users     map[string]*model.User     `json:"users"`
	FileInfos map[string]*model.FileInfo `json:"file_infos"`
	Files     map[string][]byte          `json:"files"`

	// Helper fields not in the JSON
	RootPost *model.Post     `json:"-"`
	PostList *model.PostList `json:"-"`
}

ThreadExport represents the format of exported thread data

func LoadThreadFromJSON

func LoadThreadFromJSON(t *EvalT, path string) *ThreadExport

LoadThreadFromJSON loads post data from a JSON file containing exported Mattermost thread data and returns it as ThreadData containing Posts, RootPost, and PostList for testing

func (*ThreadExport) LatestPost

func (t *ThreadExport) LatestPost() *model.Post

func (*ThreadExport) RequestingUser

func (t *ThreadExport) RequestingUser() *model.User

func (*ThreadExport) String

func (t *ThreadExport) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL