evals

package

v1.12.0 Latest Latest Go to latest Published: Mar 6, 2026 License: Apache-2.0 Imports: 19 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/mattermost/mattermost-plugin-ai

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func LLMRubricT(e *EvalT, rubric, output string)
func NumEvalsOrSkip(t *testing.T) int
func RecordScore(e *EvalT, result *EvalResult)
func Run(t *testing.T, name string, f func(e *EvalT))
type Eval
- func NewEval() (*Eval, error)
- func NewEvalWithProvider(providerName string) (*Eval, error)
- func (e *Eval) LLMRubric(rubric, output string) (*RubricResult, error)
type EvalLogLine
type EvalResult
type EvalT
type RubricResult
type ThreadExport
- func LoadThreadFromJSON(t *EvalT, path string) *ThreadExport

Constants ¶

View Source

const (
	DefaultOpenAIModel    = "gpt-5.2"
	DefaultAnthropicModel = "claude-sonnet-4-6"
	DefaultAzureModel     = "gpt-5.2"
	DefaultMistralModel   = "mistral-large-latest"
	DefaultBedrockModel   = "global.anthropic.claude-sonnet-4-6-v1:0"
)

Default models for each provider. Update these when bumping model versions.

Variables ¶

This section is empty.

Functions ¶

func LLMRubricT ¶

func LLMRubricT(e *EvalT, rubric, output string)

func NumEvalsOrSkip ¶

func NumEvalsOrSkip(t *testing.T) int

func RecordScore ¶

func RecordScore(e *EvalT, result *EvalResult)

RecordScore records the score of an eval in a JSONL file.

func Run ¶

func Run(t *testing.T, name string, f func(e *EvalT))

Types ¶

type Eval ¶

type Eval struct {
	LLM       llm.LanguageModel
	GraderLLM llm.LanguageModel
	Prompts   *llm.Prompts
	// contains filtered or unexported fields
}

func NewEval ¶

func NewEval() (*Eval, error)

func NewEvalWithProvider ¶ added in v1.7.0

func NewEvalWithProvider(providerName string) (*Eval, error)

NewEvalWithProvider creates an Eval instance with a specific provider

func (*Eval) LLMRubric ¶

func (e *Eval) LLMRubric(rubric, output string) (*RubricResult, error)

type EvalLogLine ¶

type EvalLogLine struct {
	Name      string  `json:"name"`
	Timestamp string  `json:"timestamp"`
	RunNumber int     `json:"run_number"`
	Rubric    string  `json:"rubric"`
	Output    string  `json:"output"`
	Reasoning string  `json:"reasoning"`
	Score     float64 `json:"score"`
	Pass      bool    `json:"pass"`
}

type EvalResult ¶

type EvalResult struct {
	Rubric    string  `json:"rubric"`
	Output    string  `json:"output"`
	Reasoning string  `json:"reasoning"`
	Score     float64 `json:"score"`
	Pass      bool    `json:"pass"`
}

type EvalT ¶

type EvalT struct {
	*testing.T
	*Eval
}

type RubricResult ¶

type RubricResult struct {
	Reasoning string
	Score     float64
	Pass      bool
}

type ThreadExport ¶

type ThreadExport struct {
	Posts     map[string]*model.Post     `json:"posts"`
	Channel   *model.Channel             `json:"channel"`
	Team      *model.Team                `json:"team"`
	Users     map[string]*model.User     `json:"users"`
	FileInfos map[string]*model.FileInfo `json:"file_infos"`
	Files     map[string][]byte          `json:"files"`

	// Helper fields not in the JSON
	RootPost *model.Post     `json:"-"`
	PostList *model.PostList `json:"-"`
}

ThreadExport represents the format of exported thread data

func LoadThreadFromJSON ¶

func LoadThreadFromJSON(t *EvalT, path string) *ThreadExport

LoadThreadFromJSON loads post data from a JSON file containing exported Mattermost thread data and returns it as ThreadData containing Posts, RootPost, and PostList for testing

func (*ThreadExport) LatestPost ¶

func (t *ThreadExport) LatestPost() *model.Post

func (*ThreadExport) RequestingUser ¶

func (t *ThreadExport) RequestingUser() *model.User

func (*ThreadExport) String ¶

func (t *ThreadExport) String() string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL