Documentation
¶
Overview ¶
Package audit provides types and utilities for extracting evaluation data from traces.
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func GenerateMermaid ¶
func GenerateMermaid(obj *TraceObject) string
GenerateMermaid creates a Mermaid flowchart from a TraceObject
func GenerateMermaidWithHierarchy ¶
func GenerateMermaidWithHierarchy(obj *TraceObject) string
GenerateMermaidWithHierarchy creates a Mermaid diagram respecting parent-child relationships
Types ¶
type Collector ¶
type Collector struct {
// contains filtered or unexported fields
}
Collector extracts trace events from stored span data
func NewCollector ¶
NewCollector creates a collector from a run path
func (*Collector) Collect ¶
func (c *Collector) Collect() (*TraceObject, error)
Collect extracts TraceObject from the spans
func (*Collector) GetReasoningPath ¶
func (c *Collector) GetReasoningPath(obj *TraceObject) []EventType
GetReasoningPath extracts the sequence of event types
type EventType ¶
type EventType string
EventType categorizes trace events for evaluation
const ( // EventTypeThought represents internal reasoning/decision EventTypeThought EventType = "thought" // EventTypeToolCall represents a tool invocation EventTypeToolCall EventType = "tool_call" // EventTypeObservation represents tool output/result EventTypeObservation EventType = "observation" // EventTypeLLMCall represents an LLM API call EventTypeLLMCall EventType = "llm_call" // EventTypeDecision represents a decision point EventTypeDecision EventType = "decision" )
type RawSpan ¶
type RawSpan struct {
Name string `json:"Name"`
SpanContext SpanContext `json:"SpanContext"`
Parent SpanContext `json:"Parent"`
StartTime string `json:"StartTime"`
EndTime string `json:"EndTime"`
Attributes []map[string]interface{} `json:"Attributes"`
Status SpanStatus `json:"Status"`
}
RawSpan represents a parsed span from trace.jsonl
type ReasoningAnalysis ¶
type ReasoningAnalysis struct {
// Path shows the sequence of event types taken
Path []EventType `json:"path"`
// DecisionPoints where agent made choices
DecisionPoints []TraceEvent `json:"decision_points,omitempty"`
// ToolUsageCorrect indicates if tool calls were appropriate
ToolUsageCorrect *bool `json:"tool_usage_correct,omitempty"`
// ReasoningQuality is a 0-1 score for reasoning quality (set by judge)
ReasoningQuality *float64 `json:"reasoning_quality,omitempty"`
}
ReasoningAnalysis provides evaluation-focused analysis of the trace
type SpanContext ¶
SpanContext contains span identification
type SpanStatus ¶
type SpanStatus struct {
Code string `json:"Code"`
Description string `json:"Description,omitempty"`
}
SpanStatus contains span status
type TraceEvent ¶
type TraceEvent struct {
Timestamp time.Time `json:"timestamp"`
Type EventType `json:"type"`
SpanID string `json:"span_id"`
SpanName string `json:"span_name"`
Content string `json:"content,omitempty"` // Main content (prompt, response, etc)
Metadata map[string]any `json:"metadata,omitempty"` // Additional context
DurationMs int64 `json:"duration_ms,omitempty"` // Duration in milliseconds
ParentID string `json:"parent_id,omitempty"` // Parent span for hierarchy
}
TraceEvent represents a single step in agent execution
type TraceObject ¶
type TraceObject struct {
RunID string `json:"run_id"`
Command string `json:"command,omitempty"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Events []TraceEvent `json:"events"`
FinalOutput string `json:"final_output,omitempty"`
Summary TraceSummary `json:"summary"`
}
TraceObject is the complete trace for evaluation
type TraceSummary ¶
type TraceSummary struct {
TotalEvents int `json:"total_events"`
ThoughtCount int `json:"thought_count"`
ToolCallCount int `json:"tool_call_count"`
LLMCallCount int `json:"llm_call_count"`
TotalDurationMs int64 `json:"total_duration_ms"`
TokensUsed int `json:"tokens_used,omitempty"`
EstimatedCost float64 `json:"estimated_cost,omitempty"`
HasDetailedData bool `json:"has_detailed_data"` // True if content captured
}
TraceSummary provides aggregate metrics for the trace