workspaceeval

package
v0.1.9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 27, 2026 License: Apache-2.0 Imports: 17 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AssertBehavioralTranscript

func AssertBehavioralTranscript(doc EvalDoc, transcript *sdk.ConversationStateResponse) error

func CheckEvalCatalog

func CheckEvalCatalog(evalRoot, promptRoot, templateRoot string, agentIndex map[string]AgentDoc) []string

func CheckEvidenceContractProfiles

func CheckEvidenceContractProfiles(promptRoot string, requiredProfiles []string) []string

func CheckPublicAgentsCovered

func CheckPublicAgentsCovered(evalRoot string, agentIndex map[string]AgentDoc) []string

func CheckStarterTaskCoverage

func CheckStarterTaskCoverage(evalRoot string, agentIndex map[string]AgentDoc) []string

func CollectToolSteps

func CollectToolSteps(turn *sdkapi.TurnState) []*sdkapi.ToolStepState

func DecodePayloadObject

func DecodePayloadObject(raw json.RawMessage) map[string]interface{}

func DecodePayloadText

func DecodePayloadText(raw json.RawMessage) string

func DefaultContractTests

func DefaultContractTests() []string

func DefaultRequiredEvidenceContractProfiles

func DefaultRequiredEvidenceContractProfiles() []string

func ExecuteBehavioralQuery

func ExecuteBehavioralQuery(ctx context.Context, agentlyBin, api, token, oob, agentID string, doc EvalDoc, timeoutSec int) (string, string, error)

func FirstProviderRequestPreview

func FirstProviderRequestPreview(turn *sdkapi.TurnState) string

func LoadAgents

func LoadAgents(agentRoot string) (map[string]AgentDoc, error)

func LoadEvalDocs

func LoadEvalDocs(evalRoot string) (map[string]EvalDoc, []string)

func LoadYAML

func LoadYAML(path string, dest any) error

func NewBehavioralHTTPClient

func NewBehavioralHTTPClient(ctx context.Context, api, token, oob string) (sdk.Client, error)

func ParseConversationID

func ParseConversationID(output string) string

func Run

func Run(opts Options) error

func RunBehavioralEval

func RunBehavioralEval(ctx context.Context, client sdk.Client, agentlyBin, api, token, oob string, doc EvalDoc) error

func RunBehavioralEvals

func RunBehavioralEvals(workspace, evalRoot, selected string, timeout time.Duration, api, oob, token, agentlyBin string) []string

func RunContractTests

func RunContractTests(workspace string, contractTests []string) []string

func SelectBehavioralEvals

func SelectBehavioralEvals(evals map[string]EvalDoc, selected string) []string

func StringValue

func StringValue(value interface{}) string

func TimeoutSeconds

func TimeoutSeconds(ctx context.Context) int

func Truncate

func Truncate(value string, size int) string

Types

type AgentDoc

type AgentDoc struct {
	ID           string           `yaml:"id"`
	StarterTasks []StarterTaskDoc `yaml:"starterTasks"`
	Profile      struct {
		Publish bool `yaml:"publish"`
	} `yaml:"profile"`
}

type EvalDoc

type EvalDoc struct {
	ID              string `yaml:"id"`
	Title           string `yaml:"title"`
	UserPrompt      string `yaml:"user_prompt"`
	EntryAgent      string `yaml:"entry_agent"`
	ExpectedRouting struct {
		Agent   string   `yaml:"agent"`
		Profile string   `yaml:"profile"`
		FanOut  []string `yaml:"fan_out"`
	} `yaml:"expected_routing"`
	ExpectedPreDelegationTools []EvalToolExpectation `yaml:"expected_pre_delegation_tools"`
	ExpectedOutput             struct {
		Template string `yaml:"template"`
	} `yaml:"expected_output"`
}

type EvalToolExpectation

type EvalToolExpectation struct {
	Name string `yaml:"name"`
}

type Options

type Options struct {
	Workspace            string
	ContractTests        []string
	RequiredProfiles     []string
	Behavioral           bool
	BehavioralCases      string
	BehavioralTimeoutSec int
	BehavioralAPI        string
	BehavioralOOB        string
	BehavioralToken      string
	BehavioralAgentlyBin string
}

type PromptDoc

type PromptDoc struct {
	ID               string `yaml:"id"`
	EvidenceContract struct {
		Required   []string `yaml:"required"`
		Optional   []string `yaml:"optional"`
		Forbidden  []string `yaml:"forbidden"`
		Completion []string `yaml:"completion"`
	} `yaml:"evidenceContract"`
}

type StarterTaskDoc

type StarterTaskDoc struct {
	ID              string   `yaml:"id"`
	Title           string   `yaml:"title"`
	CoverageEvalIDs []string `yaml:"coverageEvalIds"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL