Documentation
¶
Overview ¶
Package agent provides test agent functionality for contextd validation.
Package agent provides a synthetic user agent for testing contextd's Bayesian confidence system. It simulates realistic user interactions including memory recording, searching, feedback, and outcome reporting.
Index ¶
- func AnalyzeConversations(stats []*ConversationStats) map[string]interface{}
- type Action
- type Agent
- func (a *Agent) GenerateFeedbackDecision(memory MemoryResult) bool
- func (a *Agent) GenerateOutcome() bool
- func (a *Agent) GetConfidenceHistory(memoryID string) []float64
- func (a *Agent) GetSession() *Session
- func (a *Agent) GiveFeedback(ctx context.Context, memoryID string, helpful bool, reasoning string) (float64, error)
- func (a *Agent) RecordMemory(ctx context.Context, title, content, outcome string, tags []string) (string, error)
- func (a *Agent) ReportOutcome(ctx context.Context, memoryID string, succeeded bool, taskDesc string) (float64, error)
- func (a *Agent) SearchMemories(ctx context.Context, query string, limit int) ([]MemoryResult, error)
- func (a *Agent) ShouldGiveFeedback() bool
- type AssertResult
- type Assertion
- type Config
- type ContentBlock
- type ContextdClient
- type ContextdToolCall
- type ConversationEntry
- type ConversationStats
- type FeedbackEvent
- type LLMClient
- type MemoryResult
- type MessageContent
- type MockContextdClient
- func (m *MockContextdClient) GetMemory(ctx context.Context, memoryID string) (*MemoryResult, error)
- func (m *MockContextdClient) GetSignalCount(memoryID string) int
- func (m *MockContextdClient) MemoryFeedback(ctx context.Context, memoryID string, helpful bool) (float64, error)
- func (m *MockContextdClient) MemoryOutcome(ctx context.Context, memoryID string, succeeded bool, sessionID string) (float64, error)
- func (m *MockContextdClient) MemoryRecord(ctx context.Context, projectID, title, content, outcome string, tags []string) (string, float64, error)
- func (m *MockContextdClient) MemorySearch(ctx context.Context, projectID, query string, limit int) ([]MemoryResult, error)
- func (m *MockContextdClient) Reset()
- type OutcomeEvent
- type Persona
- type Runner
- type RunnerConfig
- type Scenario
- type Session
- type SessionMetrics
- type TestResult
- type ToolCall
- type ToolUseBlock
- type Turn
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AnalyzeConversations ¶
func AnalyzeConversations(stats []*ConversationStats) map[string]interface{}
AnalyzeConversations provides aggregate statistics across conversations.
Types ¶
type Action ¶
type Action struct {
Type string `json:"type"` // "record", "search", "feedback", "outcome"
Args map[string]interface{} `json:"args"`
}
Action represents a specific action in a scenario.
type Agent ¶
type Agent struct {
// contains filtered or unexported fields
}
Agent is a synthetic user agent for testing contextd.
func (*Agent) GenerateFeedbackDecision ¶
func (a *Agent) GenerateFeedbackDecision(memory MemoryResult) bool
GenerateFeedbackDecision decides if a memory was helpful based on persona.
func (*Agent) GenerateOutcome ¶
GenerateOutcome decides if a task succeeded based on persona's success rate.
func (*Agent) GetConfidenceHistory ¶
GetConfidenceHistory returns confidence tracking for a memory.
func (*Agent) GetSession ¶
GetSession returns the current session state.
func (*Agent) GiveFeedback ¶
func (a *Agent) GiveFeedback(ctx context.Context, memoryID string, helpful bool, reasoning string) (float64, error)
GiveFeedback provides feedback on a memory.
func (*Agent) RecordMemory ¶
func (a *Agent) RecordMemory(ctx context.Context, title, content, outcome string, tags []string) (string, error)
RecordMemory records a new memory and tracks confidence.
func (*Agent) ReportOutcome ¶
func (a *Agent) ReportOutcome(ctx context.Context, memoryID string, succeeded bool, taskDesc string) (float64, error)
ReportOutcome reports a task outcome for a memory.
func (*Agent) SearchMemories ¶
func (a *Agent) SearchMemories(ctx context.Context, query string, limit int) ([]MemoryResult, error)
SearchMemories searches for relevant memories.
func (*Agent) ShouldGiveFeedback ¶
ShouldGiveFeedback decides if feedback should be given based on persona.
type AssertResult ¶
type AssertResult struct {
Assertion Assertion `json:"assertion"`
Passed bool `json:"passed"`
Actual interface{} `json:"actual,omitempty"`
Message string `json:"message,omitempty"`
}
AssertResult captures individual assertion outcomes.
type Assertion ¶
type Assertion struct {
// Type of assertion
// Options: "confidence_increased", "confidence_decreased",
// "confidence_above", "confidence_below",
// "memory_count", "weight_shifted"
Type string `json:"type"`
// Target identifies what to check (e.g., memory_id)
Target string `json:"target,omitempty"`
// Value for comparison
Value interface{} `json:"value,omitempty"`
// Message to show on failure
Message string `json:"message,omitempty"`
}
Assertion defines an expected outcome.
type Config ¶
type Config struct {
Client ContextdClient
LLM LLMClient
Persona Persona
ProjectID string
Logger *zap.Logger
}
Config configures an Agent.
type ContentBlock ¶
type ContentBlock struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
ToolUse *ToolUseBlock `json:"tool_use,omitempty"`
ID string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
Input json.RawMessage `json:"input,omitempty"`
}
ContentBlock represents a content block in assistant messages.
type ContextdClient ¶
type ContextdClient interface {
// Memory operations
MemoryRecord(ctx context.Context, projectID, title, content, outcome string, tags []string) (string, float64, error)
MemorySearch(ctx context.Context, projectID, query string, limit int) ([]MemoryResult, error)
MemoryFeedback(ctx context.Context, memoryID string, helpful bool) (float64, error)
MemoryOutcome(ctx context.Context, memoryID string, succeeded bool, sessionID string) (float64, error)
// For observing state
GetMemory(ctx context.Context, memoryID string) (*MemoryResult, error)
}
ContextdClient defines the interface for interacting with contextd. This allows mocking for unit tests.
type ContextdToolCall ¶
type ContextdToolCall struct {
Timestamp time.Time
SessionID string
Tool string // memory_search, memory_record, memory_feedback, etc.
Input map[string]interface{}
Success bool
Output string
}
ContextdToolCall represents a contextd MCP tool invocation.
type ConversationEntry ¶
type ConversationEntry struct {
Type string `json:"type"`
UUID string `json:"uuid"`
ParentUUID *string `json:"parentUuid"`
SessionID string `json:"sessionId"`
Timestamp time.Time `json:"timestamp"`
Message json.RawMessage `json:"message"`
UserType string `json:"userType"`
GitBranch string `json:"gitBranch"`
CWD string `json:"cwd"`
Summary string `json:"summary,omitempty"`
ToolResult json.RawMessage `json:"toolUseResult,omitempty"`
}
ConversationEntry represents a single entry in a Claude Code JSONL export.
type ConversationStats ¶
type ConversationStats struct {
SessionID string
StartTime time.Time
EndTime time.Time
Duration time.Duration
UserMessages int
AssistantMessages int
ToolCalls int
ContextdToolCalls []ContextdToolCall
MemorySearches int
MemoryRecords int
MemoryFeedbacks int
CheckpointSaves int
CheckpointResumes int
Errors int
}
ConversationStats holds statistics about a conversation.
func ParseConversation ¶
func ParseConversation(path string) (*ConversationStats, error)
ParseConversation parses a JSONL conversation file.
func ParseConversationsDir ¶
func ParseConversationsDir(dir string) ([]*ConversationStats, error)
ParseConversationsDir parses all JSONL files in a directory.
type FeedbackEvent ¶
type FeedbackEvent struct {
Timestamp time.Time `json:"timestamp"`
MemoryID string `json:"memory_id"`
Helpful bool `json:"helpful"`
Reasoning string `json:"reasoning,omitempty"`
}
FeedbackEvent records when feedback was given.
type LLMClient ¶
type LLMClient interface {
Generate(ctx context.Context, systemPrompt, userPrompt string) (string, error)
GenerateStructured(ctx context.Context, systemPrompt, userPrompt string, schema interface{}) (interface{}, error)
}
LLMClient defines the interface for LLM interactions. Allows swapping Claude for other models or mocks.
type MemoryResult ¶
type MemoryResult struct {
ID string `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
Outcome string `json:"outcome"`
Confidence float64 `json:"confidence"`
Tags []string `json:"tags"`
}
MemoryResult represents a memory returned from contextd.
type MessageContent ¶
type MessageContent struct {
Role string `json:"role"`
Content interface{} `json:"content"` // Can be string or []ContentBlock
Model string `json:"model,omitempty"`
}
MessageContent represents the content of a message.
type MockContextdClient ¶
type MockContextdClient struct {
// contains filtered or unexported fields
}
MockContextdClient is a mock implementation of ContextdClient for testing. It simulates the Bayesian confidence system behavior.
func NewMockContextdClient ¶
func NewMockContextdClient() *MockContextdClient
NewMockContextdClient creates a new mock client.
func (*MockContextdClient) GetMemory ¶
func (m *MockContextdClient) GetMemory(ctx context.Context, memoryID string) (*MemoryResult, error)
GetMemory retrieves a memory by ID.
func (*MockContextdClient) GetSignalCount ¶
func (m *MockContextdClient) GetSignalCount(memoryID string) int
GetSignalCount returns the number of signals for a memory.
func (*MockContextdClient) MemoryFeedback ¶
func (m *MockContextdClient) MemoryFeedback(ctx context.Context, memoryID string, helpful bool) (float64, error)
MemoryFeedback provides feedback on a memory.
func (*MockContextdClient) MemoryOutcome ¶
func (m *MockContextdClient) MemoryOutcome(ctx context.Context, memoryID string, succeeded bool, sessionID string) (float64, error)
MemoryOutcome reports a task outcome.
func (*MockContextdClient) MemoryRecord ¶
func (m *MockContextdClient) MemoryRecord(ctx context.Context, projectID, title, content, outcome string, tags []string) (string, float64, error)
MemoryRecord creates a new memory.
func (*MockContextdClient) MemorySearch ¶
func (m *MockContextdClient) MemorySearch(ctx context.Context, projectID, query string, limit int) ([]MemoryResult, error)
MemorySearch returns memories matching the query.
func (*MockContextdClient) Reset ¶
func (m *MockContextdClient) Reset()
Reset clears all state (useful between tests).
type OutcomeEvent ¶
type OutcomeEvent struct {
Timestamp time.Time `json:"timestamp"`
MemoryID string `json:"memory_id"`
Succeeded bool `json:"succeeded"`
SessionID string `json:"session_id,omitempty"`
TaskDesc string `json:"task_description,omitempty"`
}
OutcomeEvent records task outcome signals.
type Persona ¶
type Persona struct {
// Name identifies this persona for logging
Name string `json:"name"`
// Description is passed to the LLM to guide behavior
Description string `json:"description"`
// Goals are what the persona is trying to accomplish
Goals []string `json:"goals"`
// Constraints limit how the persona behaves
Constraints []string `json:"constraints"`
// FeedbackStyle influences how the persona rates memories
// Options: "generous", "critical", "realistic", "random"
FeedbackStyle string `json:"feedback_style"`
// SuccessRate is the probability tasks succeed (0.0-1.0)
// Used for outcome signal generation
SuccessRate float64 `json:"success_rate"`
}
Persona defines the synthetic user's characteristics and goals. These influence how the agent interacts with contextd.
type Runner ¶
type Runner struct {
// contains filtered or unexported fields
}
Runner executes test scenarios.
func NewRunner ¶
func NewRunner(cfg RunnerConfig) (*Runner, error)
NewRunner creates a new scenario runner.
func (*Runner) RunScenario ¶
RunScenario executes a single scenario and returns results.
func (*Runner) RunScenarios ¶
RunScenarios executes multiple scenarios and aggregates results.
type RunnerConfig ¶
type RunnerConfig struct {
Client ContextdClient
LLM LLMClient
Logger *zap.Logger
}
RunnerConfig configures a Runner.
type Scenario ¶
type Scenario struct {
// Name identifies the scenario
Name string `json:"name"`
// Description explains what this scenario tests
Description string `json:"description"`
// Persona to use for this scenario
Persona Persona `json:"persona"`
// ProjectID for contextd operations
ProjectID string `json:"project_id"`
// MaxTurns limits the conversation length
MaxTurns int `json:"max_turns"`
// Actions defines the sequence of actions to take
// If empty, agent decides autonomously via LLM
Actions []Action `json:"actions,omitempty"`
// Assertions to check after scenario completes
Assertions []Assertion `json:"assertions"`
}
Scenario defines a test scenario to run.
func GenerateScenarioFromStats ¶
func GenerateScenarioFromStats(stats *ConversationStats) *Scenario
GenerateScenarioFromStats creates a test scenario from conversation statistics.
type Session ¶
type Session struct {
ID string `json:"id"`
Persona Persona `json:"persona"`
ProjectID string `json:"project_id"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Turns []Turn `json:"turns"`
Feedback []FeedbackEvent `json:"feedback"`
Outcomes []OutcomeEvent `json:"outcomes"`
Metrics SessionMetrics `json:"metrics"`
}
Session represents a complete test session.
type SessionMetrics ¶
type SessionMetrics struct {
MemoriesRecorded int `json:"memories_recorded"`
MemoriesRetrieved int `json:"memories_retrieved"`
FeedbackGiven int `json:"feedback_given"`
PositiveFeedback int `json:"positive_feedback"`
OutcomesRecorded int `json:"outcomes_recorded"`
SuccessfulOutcomes int `json:"successful_outcomes"`
AvgConfidenceDelta float64 `json:"avg_confidence_delta"`
}
SessionMetrics captures test results.
type TestResult ¶
type TestResult struct {
Scenario string `json:"scenario"`
Passed bool `json:"passed"`
Session *Session `json:"session"`
Assertions []AssertResult `json:"assertions"`
Error string `json:"error,omitempty"`
Duration time.Duration `json:"duration"`
}
TestResult captures the outcome of running a scenario.
type ToolCall ¶
type ToolCall struct {
Name string `json:"name"`
Args map[string]interface{} `json:"args"`
Result interface{} `json:"result,omitempty"`
Error string `json:"error,omitempty"`
}
ToolCall represents an MCP tool invocation.
type ToolUseBlock ¶
type ToolUseBlock struct {
ID string `json:"id"`
Name string `json:"name"`
Input json.RawMessage `json:"input"`
}
ToolUseBlock represents a tool use in content.