evals

package

v1.1.11 Latest Latest Go to latest Published: Feb 15, 2026 License: Apache-2.0 Imports: 15 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/AltairaLabs/PromptKit

Links

Open Source Insights

Documentation ¶

Overview ¶

Package evals provides the core evaluation framework for PromptPack. Eval definitions travel with packs and can run both during Arena testing and at runtime in production via the SDK.

Index ¶

Constants
Variables
func RegisterDefault(h EvalTypeHandler)
func ShouldRun(trigger EvalTrigger, samplePct float64, ctx *TriggerContext) bool
func ValidateEvals(defs []EvalDef, scope string) []string
type CompositeResultWriter
- func NewCompositeResultWriter(writers ...ResultWriter) *CompositeResultWriter
- func (w *CompositeResultWriter) WriteResults(ctx context.Context, results []EvalResult) error
type EvalContext
type EvalDef
- func ResolveEvals(packEvals, promptEvals []EvalDef) []EvalDef
- func (e *EvalDef) GetSamplePercentage() float64
- func (e *EvalDef) IsEnabled() bool
type EvalDispatcher
type EvalResult
type EvalRunner
- func NewEvalRunner(registry *EvalTypeRegistry, opts ...RunnerOption) *EvalRunner
- func (r *EvalRunner) RunSessionEvals(ctx context.Context, defs []EvalDef, evalCtx *EvalContext) []EvalResult
- func (r *EvalRunner) RunTurnEvals(ctx context.Context, defs []EvalDef, evalCtx *EvalContext) []EvalResult
type EvalTrigger
type EvalTypeHandler
type EvalTypeRegistry
- func NewEmptyEvalTypeRegistry() *EvalTypeRegistry
- func NewEvalTypeRegistry() *EvalTypeRegistry
- func (r *EvalTypeRegistry) Get(evalType string) (EvalTypeHandler, error)
- func (r *EvalTypeRegistry) Has(evalType string) bool
- func (r *EvalTypeRegistry) Register(handler EvalTypeHandler)
- func (r *EvalTypeRegistry) Types() []string
type EvalWorker
- func NewEvalWorker(runner *EvalRunner, subscriber EventSubscriber, resultWriter ResultWriter, ...) *EvalWorker
- func (w *EvalWorker) Start(ctx context.Context) error
type EventBusEvalListener
- func NewEventBusEvalListener(bus *events.EventBus, dispatcher EvalDispatcher, evalLoader PackEvalLoader, ...) *EventBusEvalListener
- func (l *EventBusEvalListener) Accumulator() *SessionAccumulator
- func (l *EventBusEvalListener) Close() error
- func (l *EventBusEvalListener) CloseSession(sessionID string)
- func (l *EventBusEvalListener) Handle(event *events.Event)
type EventBusEvalListenerOption
- func WithTTL(ttl time.Duration) EventBusEvalListenerOption
type EventDispatcher
- func NewEventDispatcher(publisher EventPublisher) *EventDispatcher
- func (d *EventDispatcher) DispatchSessionEvals(ctx context.Context, defs []EvalDef, evalCtx *EvalContext) ([]EvalResult, error)
- func (d *EventDispatcher) DispatchTurnEvals(ctx context.Context, defs []EvalDef, evalCtx *EvalContext) ([]EvalResult, error)
type EventPublisher
type EventSubscriber
type InProcDispatcher
- func NewInProcDispatcher(runner *EvalRunner, resultWriter ResultWriter) *InProcDispatcher
- func (d *InProcDispatcher) DispatchSessionEvals(ctx context.Context, defs []EvalDef, evalCtx *EvalContext) ([]EvalResult, error)
- func (d *InProcDispatcher) DispatchTurnEvals(ctx context.Context, defs []EvalDef, evalCtx *EvalContext) ([]EvalResult, error)
type Logger
type MetadataResultWriter
- func (w *MetadataResultWriter) WriteResults(_ context.Context, _ []EvalResult) error
type MetricCollector
- func NewMetricCollector(opts ...MetricCollectorOption) *MetricCollector
- func (mc *MetricCollector) Record(result EvalResult, metric *MetricDef) error
- func (mc *MetricCollector) Reset()
- func (mc *MetricCollector) WritePrometheus(w io.Writer) error
type MetricCollectorOption
- func WithBuckets(buckets []float64) MetricCollectorOption
- func WithNamespace(ns string) MetricCollectorOption
type MetricDef
- func (m MetricDef) MarshalJSON() ([]byte, error)
- func (m *MetricDef) UnmarshalJSON(data []byte) error
type MetricRecorder
type MetricResultWriter
- func NewMetricResultWriter(recorder MetricRecorder, defs []EvalDef) *MetricResultWriter
- func (w *MetricResultWriter) WriteResults(_ context.Context, results []EvalResult) error
type MetricType
type NoOpDispatcher
- func (d *NoOpDispatcher) DispatchSessionEvals(_ context.Context, _ []EvalDef, _ *EvalContext) ([]EvalResult, error)
- func (d *NoOpDispatcher) DispatchTurnEvals(_ context.Context, _ []EvalDef, _ *EvalContext) ([]EvalResult, error)
type PackEvalLoader
type Range
type ResultWriter
type RunnerOption
- func WithTimeout(d time.Duration) RunnerOption
type SessionAccumulator
- func NewSessionAccumulator() *SessionAccumulator
- func (sa *SessionAccumulator) AddMessage(sessionID, promptID, role, content string)
- func (sa *SessionAccumulator) BuildEvalContext(sessionID string) *EvalContext
- func (sa *SessionAccumulator) CleanupBefore(cutoff time.Time) int
- func (sa *SessionAccumulator) PromptID(sessionID string) string
- func (sa *SessionAccumulator) Remove(sessionID string)
type ToolCallRecord
type TriggerContext
type WorkerOption
- func WithLogger(l Logger) WorkerOption

Constants ¶

View Source

const DefaultEvalTimeout = 30 * time.Second

DefaultEvalTimeout is the per-eval execution timeout.

View Source

const DefaultSamplePercentage = 5.0

DefaultSamplePercentage is the default sampling rate when not specified.

Variables ¶

View Source

var DefaultBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}

DefaultBuckets are the default Prometheus histogram bucket boundaries. These match prometheus.DefBuckets.

View Source

var ValidMetricTypes = map[MetricType]bool{
	MetricGauge:     true,
	MetricCounter:   true,
	MetricHistogram: true,
	MetricBoolean:   true,
}

ValidMetricTypes is the set of valid metric type values.

View Source

var ValidTriggers = map[EvalTrigger]bool{
	TriggerEveryTurn:         true,
	TriggerOnSessionComplete: true,
	TriggerSampleTurns:       true,
	TriggerSampleSessions:    true,
}

ValidTriggers is the set of valid trigger values.

Functions ¶

func RegisterDefault ¶

func RegisterDefault(h EvalTypeHandler)

RegisterDefault adds a handler to the default set used by NewEvalTypeRegistry. Call this from handler init() functions or from handlers.RegisterDefaults().

func ShouldRun ¶

func ShouldRun(
	trigger EvalTrigger, samplePct float64, ctx *TriggerContext,
) bool

ShouldRun determines whether an eval should fire given its trigger, sampling percentage, and current context. Sampling is deterministic: the same sessionID+turnIndex always produces the same decision.

func ValidateEvals ¶

func ValidateEvals(defs []EvalDef, scope string) []string

ValidateEvals validates a slice of EvalDef for correctness. The scope parameter is used in error messages (e.g. "pack", "prompt:foo"). It checks:

IDs are non-empty and unique within the slice
Type is non-empty
Trigger is a valid value
sample_percentage (if set) is in [0, 100]
Metric name matches Prometheus naming regex
Metric type is one of gauge/counter/histogram/boolean

Types ¶

type CompositeResultWriter ¶

type CompositeResultWriter struct {
	// contains filtered or unexported fields
}

CompositeResultWriter fans out WriteResults calls to multiple writers. All writers are called; the first error encountered is returned.

func NewCompositeResultWriter ¶

func NewCompositeResultWriter(writers ...ResultWriter) *CompositeResultWriter

NewCompositeResultWriter creates a writer that delegates to multiple writers. Writers are called in order.

func (*CompositeResultWriter) WriteResults ¶

func (w *CompositeResultWriter) WriteResults(
	ctx context.Context, results []EvalResult,
) error

WriteResults calls WriteResults on each writer in order. Returns the first error encountered.

type EvalContext ¶

type EvalContext struct {
	Messages      []types.Message  `json:"messages"`
	TurnIndex     int              `json:"turn_index"`
	CurrentOutput string           `json:"current_output"`
	ToolCalls     []ToolCallRecord `json:"tool_calls,omitempty"`
	SessionID     string           `json:"session_id"`
	PromptID      string           `json:"prompt_id"`
	Variables     map[string]any   `json:"variables,omitempty"`
	Metadata      map[string]any   `json:"metadata,omitempty"`
}

EvalContext provides data to eval handlers. For turn-level evals: Messages contains history up to the current turn. For session-level evals: Messages contains the full conversation.

type EvalDef ¶

type EvalDef struct {
	ID               string         `json:"id" yaml:"id"`
	Type             string         `json:"type" yaml:"type"`
	Trigger          EvalTrigger    `json:"trigger" yaml:"trigger"`
	Params           map[string]any `json:"params" yaml:"params"`
	Description      string         `json:"description,omitempty" yaml:"description,omitempty"`
	Enabled          *bool          `json:"enabled,omitempty" yaml:"enabled,omitempty"`
	SamplePercentage *float64       `json:"sample_percentage,omitempty" yaml:"sample_percentage,omitempty"`
	Metric           *MetricDef     `json:"metric,omitempty" yaml:"metric,omitempty"`
}

EvalDef defines a single evaluation within a PromptPack. Evals are defined at pack level and/or prompt level. Prompt-level evals override pack-level evals by ID.

func ResolveEvals ¶

func ResolveEvals(packEvals, promptEvals []EvalDef) []EvalDef

ResolveEvals merges pack-level and prompt-level eval definitions. Prompt-level evals override pack-level evals when they share the same ID. The returned slice preserves pack ordering first, followed by any prompt-only evals (those with no pack counterpart) in their original order.

func (*EvalDef) GetSamplePercentage ¶

func (e *EvalDef) GetSamplePercentage() float64

GetSamplePercentage returns the sampling percentage. Defaults to DefaultSamplePercentage when SamplePercentage is nil.

func (*EvalDef) IsEnabled ¶

func (e *EvalDef) IsEnabled() bool

IsEnabled returns whether this eval is enabled. Defaults to true when Enabled is nil.

type EvalDispatcher ¶

type EvalDispatcher interface {
	// DispatchTurnEvals dispatches turn-level evals.
	// Returns results synchronously (InProc) or nil (Event/NoOp).
	DispatchTurnEvals(
		ctx context.Context, defs []EvalDef, evalCtx *EvalContext,
	) ([]EvalResult, error)

	// DispatchSessionEvals dispatches session-level evals.
	// Returns results synchronously (InProc) or nil (Event/NoOp).
	DispatchSessionEvals(
		ctx context.Context, defs []EvalDef, evalCtx *EvalContext,
	) ([]EvalResult, error)
}

EvalDispatcher controls WHERE evals execute. Implementations decide whether evals run in-process, are published to an event bus for async processing, or are skipped entirely.

type EvalResult ¶

type EvalResult struct {
	EvalID      string   `json:"eval_id"`
	Type        string   `json:"type"`
	Passed      bool     `json:"passed"`
	Score       *float64 `json:"score,omitempty"`
	MetricValue *float64 `json:"metric_value,omitempty"`
	Explanation string   `json:"explanation,omitempty"`
	DurationMs  int64    `json:"duration_ms"`
	Error       string   `json:"error,omitempty"`
}

EvalResult captures the outcome of a single eval execution.

type EvalRunner ¶

type EvalRunner struct {
	// contains filtered or unexported fields
}

EvalRunner executes evals in-process. It is the leaf execution unit used by all dispatch modes (in-proc, event-driven, worker).

func NewEvalRunner ¶

func NewEvalRunner(
	registry *EvalTypeRegistry, opts ...RunnerOption,
) *EvalRunner

NewEvalRunner creates an EvalRunner with the given registry and options.

func (*EvalRunner) RunSessionEvals ¶

func (r *EvalRunner) RunSessionEvals(
	ctx context.Context,
	defs []EvalDef,
	evalCtx *EvalContext,
) []EvalResult

RunSessionEvals runs session-level evals (on_session_complete and sample_sessions triggers). Call this when a session ends.

func (*EvalRunner) RunTurnEvals ¶

func (r *EvalRunner) RunTurnEvals(
	ctx context.Context,
	defs []EvalDef,
	evalCtx *EvalContext,
) []EvalResult

RunTurnEvals runs turn-level evals (every_turn and sample_turns triggers). It filters by enabled state and trigger, then executes matching handlers.

type EvalTrigger ¶

type EvalTrigger string

EvalTrigger determines when an eval fires.

const (
	// TriggerEveryTurn fires the eval after every assistant turn.
	TriggerEveryTurn EvalTrigger = "every_turn"
	// TriggerOnSessionComplete fires the eval when a session ends.
	TriggerOnSessionComplete EvalTrigger = "on_session_complete"
	// TriggerSampleTurns fires the eval on a percentage of turns (hash-based).
	TriggerSampleTurns EvalTrigger = "sample_turns"
	// TriggerSampleSessions fires the eval on a percentage of sessions (hash-based).
	TriggerSampleSessions EvalTrigger = "sample_sessions"
)

type EvalTypeHandler ¶

type EvalTypeHandler interface {
	// Type returns the eval type identifier (e.g. "contains", "regex").
	Type() string

	// Eval executes the evaluation and returns a result.
	// The EvalContext carries messages, tool calls, and metadata.
	// Params come from the EvalDef.Params map.
	Eval(ctx context.Context, evalCtx *EvalContext, params map[string]any) (*EvalResult, error)
}

EvalTypeHandler defines the interface for eval type implementations. Each handler covers a single eval type (e.g. "contains", "llm_judge"). Handlers are stateless — params are passed per invocation.

type EvalTypeRegistry ¶

type EvalTypeRegistry struct {
	// contains filtered or unexported fields
}

EvalTypeRegistry provides thread-safe registration and lookup of EvalTypeHandler implementations by type name.

func NewEmptyEvalTypeRegistry ¶

func NewEmptyEvalTypeRegistry() *EvalTypeRegistry

NewEmptyEvalTypeRegistry creates a registry with no handlers registered. Use this in tests to control exactly which handlers are available.

func NewEvalTypeRegistry ¶

func NewEvalTypeRegistry() *EvalTypeRegistry

NewEvalTypeRegistry creates a registry pre-populated with all built-in eval handlers. Call this in production code. Handlers self-register via RegisterDefaults in the handlers package; import _ "github.com/AltairaLabs/PromptKit/runtime/evals/handlers" or call handlers.RegisterDefaults(r) explicitly.

func (*EvalTypeRegistry) Get ¶

func (r *EvalTypeRegistry) Get(evalType string) (EvalTypeHandler, error)

Get returns the handler for the given type, or an error if not found.

func (*EvalTypeRegistry) Has ¶

func (r *EvalTypeRegistry) Has(evalType string) bool

Has returns true if a handler is registered for the given type.

func (*EvalTypeRegistry) Register ¶

func (r *EvalTypeRegistry) Register(handler EvalTypeHandler)

Register adds a handler to the registry. If a handler with the same type is already registered, it is replaced.

func (*EvalTypeRegistry) Types ¶

func (r *EvalTypeRegistry) Types() []string

Types returns a sorted list of all registered eval type names.

type EvalWorker ¶

type EvalWorker struct {
	// contains filtered or unexported fields
}

EvalWorker is a reusable worker loop for Pattern B event-driven eval execution. It subscribes to eval events via EventSubscriber, deserializes payloads, calls EvalRunner, and writes results via ResultWriter. Platforms wire this with their own EventSubscriber and ResultWriter implementations.

func NewEvalWorker ¶

func NewEvalWorker(
	runner *EvalRunner,
	subscriber EventSubscriber,
	resultWriter ResultWriter,
	opts ...WorkerOption,
) *EvalWorker

NewEvalWorker creates a worker that processes eval events.

func (*EvalWorker) Start ¶

func (w *EvalWorker) Start(ctx context.Context) error

Start subscribes to turn and session eval events and processes them. It blocks until the context is canceled or a subscription error occurs.

type EventBusEvalListener ¶

type EventBusEvalListener struct {
	// contains filtered or unexported fields
}

EventBusEvalListener subscribes to EventBus message events and triggers evals automatically (Pattern C). It accumulates messages per session and dispatches turn evals on assistant messages, session evals on close.

func NewEventBusEvalListener ¶

func NewEventBusEvalListener(
	bus *events.EventBus,
	dispatcher EvalDispatcher,
	evalLoader PackEvalLoader,
	resultWriter ResultWriter,
	opts ...EventBusEvalListenerOption,
) *EventBusEvalListener

NewEventBusEvalListener creates a listener that subscribes to the bus for EventMessageCreated events and runs evals automatically.

func (*EventBusEvalListener) Accumulator ¶

func (l *EventBusEvalListener) Accumulator() *SessionAccumulator

Accumulator returns the session accumulator for external seeding. Use this to set prompt IDs on sessions before messages arrive.

func (*EventBusEvalListener) Close ¶

func (l *EventBusEvalListener) Close() error

Close stops the cleanup goroutine.

func (*EventBusEvalListener) CloseSession ¶

func (l *EventBusEvalListener) CloseSession(sessionID string)

CloseSession runs session-complete evals and removes the session.

func (*EventBusEvalListener) Handle ¶

func (l *EventBusEvalListener) Handle(event *events.Event)

Handle is the events.Listener callback for EventMessageCreated events.

type EventBusEvalListenerOption ¶

type EventBusEvalListenerOption func(*EventBusEvalListener)

EventBusEvalListenerOption configures an EventBusEvalListener.

func WithTTL ¶

func WithTTL(ttl time.Duration) EventBusEvalListenerOption

WithTTL sets the session TTL for the listener.

type EventDispatcher ¶

type EventDispatcher struct {
	// contains filtered or unexported fields
}

EventDispatcher publishes eval requests to an event bus for async processing by an EvalWorker (Pattern B). Returns nil results since evals run asynchronously in the worker.

func NewEventDispatcher ¶

func NewEventDispatcher(publisher EventPublisher) *EventDispatcher

NewEventDispatcher creates a dispatcher that publishes to an event bus.

func (*EventDispatcher) DispatchSessionEvals ¶

func (d *EventDispatcher) DispatchSessionEvals(
	ctx context.Context, defs []EvalDef, evalCtx *EvalContext,
) ([]EvalResult, error)

DispatchSessionEvals publishes session eval request to the event bus. Subject: eval.session.{session_id}

func (*EventDispatcher) DispatchTurnEvals ¶

func (d *EventDispatcher) DispatchTurnEvals(
	ctx context.Context, defs []EvalDef, evalCtx *EvalContext,
) ([]EvalResult, error)

DispatchTurnEvals publishes turn eval request to the event bus. Subject: eval.turn.{session_id}

type EventPublisher ¶

type EventPublisher interface {
	Publish(ctx context.Context, subject string, data []byte) error
}

EventPublisher publishes serialized eval payloads to an event bus. PromptKit ships this interface only — platforms provide concrete implementations backed by Redis Streams, NATS, Kafka, etc.

type EventSubscriber ¶

type EventSubscriber interface {
	Subscribe(
		ctx context.Context,
		subject string,
		handler func(event []byte) error,
	) error
}

EventSubscriber subscribes to eval events from an event bus. PromptKit ships this interface only — platforms provide concrete implementations backed by Redis Streams, NATS, Kafka, etc.

type InProcDispatcher ¶

type InProcDispatcher struct {
	// contains filtered or unexported fields
}

InProcDispatcher runs evals directly via EvalRunner and writes results via ResultWriter. Used by Arena (always) and SDK simple deployments. Results are returned synchronously.

func NewInProcDispatcher ¶

func NewInProcDispatcher(
	runner *EvalRunner, resultWriter ResultWriter,
) *InProcDispatcher

NewInProcDispatcher creates a dispatcher that runs evals in-process. The resultWriter may be nil if no result writing is needed.

func (*InProcDispatcher) DispatchSessionEvals ¶

func (d *InProcDispatcher) DispatchSessionEvals(
	ctx context.Context, defs []EvalDef, evalCtx *EvalContext,
) ([]EvalResult, error)

DispatchSessionEvals runs session-level evals in-process.

func (*InProcDispatcher) DispatchTurnEvals ¶

func (d *InProcDispatcher) DispatchTurnEvals(
	ctx context.Context, defs []EvalDef, evalCtx *EvalContext,
) ([]EvalResult, error)

DispatchTurnEvals runs turn-level evals in-process.

type Logger ¶

type Logger interface {
	Printf(format string, v ...any)
}

Logger is a minimal logging interface for EvalWorker.

type MetadataResultWriter ¶

type MetadataResultWriter struct{}

MetadataResultWriter stores eval results in the EvalContext metadata under the key "pack_evals". This is used by Arena and SDK to attach results to message metadata for reporting.

func (*MetadataResultWriter) WriteResults ¶

func (w *MetadataResultWriter) WriteResults(
	_ context.Context, _ []EvalResult,
) error

WriteResults is a no-op placeholder. The actual metadata attachment happens at the caller level since the writer doesn't have access to the message being constructed. Callers use the returned results from InProcDispatcher to populate msg.Meta["pack_evals"].

type MetricCollector ¶

type MetricCollector struct {
	// contains filtered or unexported fields
}

MetricCollector implements MetricRecorder and provides Prometheus text exposition. It is safe for concurrent use.

func NewMetricCollector ¶

func NewMetricCollector(opts ...MetricCollectorOption) *MetricCollector

NewMetricCollector creates a new MetricCollector with the given options.

func (*MetricCollector) Record ¶

func (mc *MetricCollector) Record(result EvalResult, metric *MetricDef) error

Record records an eval result for the given metric definition. Thread-safe.

func (*MetricCollector) Reset ¶

func (mc *MetricCollector) Reset()

Reset clears all metrics. Primarily for testing.

func (*MetricCollector) WritePrometheus ¶

func (mc *MetricCollector) WritePrometheus(w io.Writer) error

WritePrometheus writes all metrics in Prometheus text exposition format.

type MetricCollectorOption ¶

type MetricCollectorOption func(*MetricCollector)

MetricCollectorOption configures a MetricCollector.

func WithBuckets ¶

func WithBuckets(buckets []float64) MetricCollectorOption

WithBuckets sets custom histogram bucket boundaries.

func WithNamespace ¶

func WithNamespace(ns string) MetricCollectorOption

WithNamespace sets the metric name prefix (e.g. "promptpack").

type MetricDef ¶

type MetricDef struct {
	Name  string     `json:"name" yaml:"name"`
	Type  MetricType `json:"type" yaml:"type"`
	Range *Range     `json:"range,omitempty" yaml:"range,omitempty"`

	// Extra holds additional properties beyond the defined schema fields.
	// This supports the RFC's additionalProperties: true on metric.
	Extra map[string]any `json:"-" yaml:"-"`
}

MetricDef defines a Prometheus-style metric associated with an eval. The Extra field captures additionalProperties from the schema.

func (MetricDef) MarshalJSON ¶

func (m MetricDef) MarshalJSON() ([]byte, error)

MarshalJSON implements custom JSON marshaling to include Extra fields as top-level properties alongside the known fields.

func (*MetricDef) UnmarshalJSON ¶

func (m *MetricDef) UnmarshalJSON(data []byte) error

UnmarshalJSON implements custom JSON unmarshaling to capture additional properties into the Extra field.

type MetricRecorder ¶

type MetricRecorder interface {
	Record(result EvalResult, metric *MetricDef) error
}

MetricRecorder records eval results as metrics. This interface is implemented by MetricCollector (defined in the metrics package) and injected here to avoid circular dependencies.

type MetricResultWriter ¶

type MetricResultWriter struct {
	// contains filtered or unexported fields
}

MetricResultWriter feeds eval results to a MetricRecorder for Prometheus exposition. Only results whose corresponding EvalDef has a Metric definition are recorded.

func NewMetricResultWriter ¶

func NewMetricResultWriter(
	recorder MetricRecorder, defs []EvalDef,
) *MetricResultWriter

NewMetricResultWriter creates a writer that records metrics. The defs slice provides the metric definitions keyed by eval ID.

func (*MetricResultWriter) WriteResults ¶

func (w *MetricResultWriter) WriteResults(
	_ context.Context, results []EvalResult,
) error

WriteResults records each result that has an associated metric.

type MetricType ¶

type MetricType string

MetricType defines the Prometheus metric type for eval results.

const (
	// MetricGauge represents a gauge metric (set to a value).
	MetricGauge MetricType = "gauge"
	// MetricCounter represents a counter metric (increment only).
	MetricCounter MetricType = "counter"
	// MetricHistogram represents a histogram metric (observe values).
	MetricHistogram MetricType = "histogram"
	// MetricBoolean represents a boolean metric (0 or 1).
	MetricBoolean MetricType = "boolean"
)

type NoOpDispatcher ¶

type NoOpDispatcher struct{}

NoOpDispatcher is used when evals are disabled at the SDK dispatch level. Returns nil results with no error. Used when the platform handles evals externally (Pattern A) or via EventBusEvalListener (Pattern C).

func (*NoOpDispatcher) DispatchSessionEvals ¶

func (d *NoOpDispatcher) DispatchSessionEvals(
	_ context.Context, _ []EvalDef, _ *EvalContext,
) ([]EvalResult, error)

DispatchSessionEvals is a no-op that returns nil results.

func (*NoOpDispatcher) DispatchTurnEvals ¶

func (d *NoOpDispatcher) DispatchTurnEvals(
	_ context.Context, _ []EvalDef, _ *EvalContext,
) ([]EvalResult, error)

DispatchTurnEvals is a no-op that returns nil results.

type PackEvalLoader ¶

type PackEvalLoader interface {
	LoadEvals(promptID string) ([]EvalDef, error)
}

PackEvalLoader resolves eval definitions for a prompt. Implementations are provided by SDK/platform.

type Range ¶

type Range struct {
	Min *float64 `json:"min,omitempty" yaml:"min,omitempty"`
	Max *float64 `json:"max,omitempty" yaml:"max,omitempty"`
}

Range defines the valid range for a metric value.

type ResultWriter ¶

type ResultWriter interface {
	WriteResults(ctx context.Context, results []EvalResult) error
}

ResultWriter controls WHERE eval results go. Implementations may write to Prometheus metrics, message metadata, telemetry spans, databases, or external APIs. Platform-specific writers are implemented outside PromptKit.

type RunnerOption ¶

type RunnerOption func(*EvalRunner)

RunnerOption configures an EvalRunner.

func WithTimeout ¶

func WithTimeout(d time.Duration) RunnerOption

WithTimeout sets the per-eval execution timeout.

type SessionAccumulator ¶

type SessionAccumulator struct {
	// contains filtered or unexported fields
}

SessionAccumulator accumulates messages per session for eval context building.

func NewSessionAccumulator ¶

func NewSessionAccumulator() *SessionAccumulator

NewSessionAccumulator creates a new SessionAccumulator.

func (*SessionAccumulator) AddMessage ¶

func (sa *SessionAccumulator) AddMessage(sessionID, promptID, role, content string)

AddMessage adds a message to a session's accumulator.

func (*SessionAccumulator) BuildEvalContext ¶

func (sa *SessionAccumulator) BuildEvalContext(sessionID string) *EvalContext

BuildEvalContext builds an EvalContext from the accumulated session state.

func (*SessionAccumulator) CleanupBefore ¶

func (sa *SessionAccumulator) CleanupBefore(cutoff time.Time) int

CleanupBefore removes sessions with lastSeen before the cutoff. Returns the number of sessions removed.

func (*SessionAccumulator) PromptID ¶

func (sa *SessionAccumulator) PromptID(sessionID string) string

PromptID returns the prompt ID for a session, or empty string if not found.

func (*SessionAccumulator) Remove ¶

func (sa *SessionAccumulator) Remove(sessionID string)

Remove removes a session from the accumulator.

type ToolCallRecord ¶

type ToolCallRecord struct {
	TurnIndex int            `json:"turn_index"`
	ToolName  string         `json:"tool_name"`
	Arguments map[string]any `json:"arguments"`
	Result    any            `json:"result,omitempty"`
	Error     string         `json:"error,omitempty"`
	Duration  time.Duration  `json:"duration,omitempty"`
}

ToolCallRecord captures a single tool invocation for eval context.

type TriggerContext ¶

type TriggerContext struct {
	// SessionID identifies the current session (used for sampling).
	SessionID string

	// TurnIndex is the current turn number (used for sampling).
	TurnIndex int

	// IsSessionComplete indicates whether the session has ended.
	IsSessionComplete bool
}

TriggerContext provides context for trigger evaluation decisions.

type WorkerOption ¶

type WorkerOption func(*EvalWorker)

WorkerOption configures an EvalWorker.

func WithLogger ¶

func WithLogger(l Logger) WorkerOption

WithLogger sets a custom logger for the worker.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
handlers Package handlers provides eval type handler implementations.	Package handlers provides eval type handler implementations.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL