agent

package

v0.3.4 Latest Latest Go to latest Published: Jan 14, 2026 License: GPL-3.0 Imports: 12 Imported by: 0

Documentation ¶

Overview ¶

Package agent provides test agent functionality for contextd validation.

Package agent provides a synthetic user agent for testing contextd's Bayesian confidence system. It simulates realistic user interactions including memory recording, searching, feedback, and outcome reporting.

Index ¶

func AnalyzeConversations(stats []*ConversationStats) map[string]interface{}
type Action
type Agent
- func New(cfg Config) (*Agent, error)
- func (a *Agent) GenerateFeedbackDecision(memory MemoryResult) bool
- func (a *Agent) GenerateOutcome() bool
- func (a *Agent) GetConfidenceHistory(memoryID string) []float64
- func (a *Agent) GetSession() *Session
- func (a *Agent) GiveFeedback(ctx context.Context, memoryID string, helpful bool, reasoning string) (float64, error)
- func (a *Agent) RecordMemory(ctx context.Context, title, content, outcome string, tags []string) (string, error)
- func (a *Agent) ReportOutcome(ctx context.Context, memoryID string, succeeded bool, taskDesc string) (float64, error)
- func (a *Agent) SearchMemories(ctx context.Context, query string, limit int) ([]MemoryResult, error)
- func (a *Agent) ShouldGiveFeedback() bool
type AssertResult
type Assertion
type Config
type ContentBlock
type ContextdClient
type ContextdToolCall
type ConversationEntry
type ConversationStats
- func ParseConversation(path string) (*ConversationStats, error)
- func ParseConversationsDir(dir string) ([]*ConversationStats, error)
type FeedbackEvent
type LLMClient
type MemoryResult
type MessageContent
type MockContextdClient
- func NewMockContextdClient() *MockContextdClient
- func (m *MockContextdClient) GetMemory(ctx context.Context, memoryID string) (*MemoryResult, error)
- func (m *MockContextdClient) GetSignalCount(memoryID string) int
- func (m *MockContextdClient) MemoryFeedback(ctx context.Context, memoryID string, helpful bool) (float64, error)
- func (m *MockContextdClient) MemoryOutcome(ctx context.Context, memoryID string, succeeded bool, sessionID string) (float64, error)
- func (m *MockContextdClient) MemoryRecord(ctx context.Context, projectID, title, content, outcome string, tags []string) (string, float64, error)
- func (m *MockContextdClient) MemorySearch(ctx context.Context, projectID, query string, limit int) ([]MemoryResult, error)
- func (m *MockContextdClient) Reset()
type OutcomeEvent
type Persona
type Runner
- func NewRunner(cfg RunnerConfig) (*Runner, error)
- func (r *Runner) RunScenario(ctx context.Context, scenario Scenario) (*TestResult, error)
- func (r *Runner) RunScenarios(ctx context.Context, scenarios []Scenario) ([]TestResult, error)
type RunnerConfig
type Scenario
- func GenerateScenarioFromStats(stats *ConversationStats) *Scenario
type Session
type SessionMetrics
type TestResult
type ToolCall
type ToolUseBlock
type Turn

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func AnalyzeConversations ¶

func AnalyzeConversations(stats []*ConversationStats) map[string]interface{}

AnalyzeConversations provides aggregate statistics across conversations.

Types ¶

type Action ¶

type Action struct {
	Type string                 `json:"type"` // "record", "search", "feedback", "outcome"
	Args map[string]interface{} `json:"args"`
}

Action represents a specific action in a scenario.

type Agent ¶

type Agent struct {
	// contains filtered or unexported fields
}

Agent is a synthetic user agent for testing contextd.

func New ¶

func New(cfg Config) (*Agent, error)

New creates a new test agent.

func (*Agent) GenerateFeedbackDecision ¶

func (a *Agent) GenerateFeedbackDecision(memory MemoryResult) bool

GenerateFeedbackDecision decides if a memory was helpful based on persona.

func (*Agent) GenerateOutcome ¶

func (a *Agent) GenerateOutcome() bool

GenerateOutcome decides if a task succeeded based on persona's success rate.

func (*Agent) GetConfidenceHistory ¶

func (a *Agent) GetConfidenceHistory(memoryID string) []float64

GetConfidenceHistory returns confidence tracking for a memory.

func (*Agent) GetSession ¶

func (a *Agent) GetSession() *Session

GetSession returns the current session state.

func (*Agent) GiveFeedback ¶

func (a *Agent) GiveFeedback(ctx context.Context, memoryID string, helpful bool, reasoning string) (float64, error)

GiveFeedback provides feedback on a memory.

func (*Agent) RecordMemory ¶

func (a *Agent) RecordMemory(ctx context.Context, title, content, outcome string, tags []string) (string, error)

RecordMemory records a new memory and tracks confidence.

func (*Agent) ReportOutcome ¶

func (a *Agent) ReportOutcome(ctx context.Context, memoryID string, succeeded bool, taskDesc string) (float64, error)

ReportOutcome reports a task outcome for a memory.

func (*Agent) SearchMemories ¶

func (a *Agent) SearchMemories(ctx context.Context, query string, limit int) ([]MemoryResult, error)

SearchMemories searches for relevant memories.

func (*Agent) ShouldGiveFeedback ¶

func (a *Agent) ShouldGiveFeedback() bool

ShouldGiveFeedback decides if feedback should be given based on persona.

type AssertResult ¶

type AssertResult struct {
	Assertion Assertion   `json:"assertion"`
	Passed    bool        `json:"passed"`
	Actual    interface{} `json:"actual,omitempty"`
	Message   string      `json:"message,omitempty"`
}

AssertResult captures individual assertion outcomes.

type Assertion ¶

type Assertion struct {
	// Type of assertion
	// Options: "confidence_increased", "confidence_decreased",
	//          "confidence_above", "confidence_below",
	//          "memory_count", "weight_shifted"
	Type string `json:"type"`

	// Target identifies what to check (e.g., memory_id)
	Target string `json:"target,omitempty"`

	// Value for comparison
	Value interface{} `json:"value,omitempty"`

	// Message to show on failure
	Message string `json:"message,omitempty"`
}

Assertion defines an expected outcome.

type Config ¶

type Config struct {
	Client    ContextdClient
	LLM       LLMClient
	Persona   Persona
	ProjectID string
	Logger    *zap.Logger
}

Config configures an Agent.

type ContentBlock ¶

type ContentBlock struct {
	Type    string          `json:"type"`
	Text    string          `json:"text,omitempty"`
	ToolUse *ToolUseBlock   `json:"tool_use,omitempty"`
	ID      string          `json:"id,omitempty"`
	Name    string          `json:"name,omitempty"`
	Input   json.RawMessage `json:"input,omitempty"`
}

ContentBlock represents a content block in assistant messages.

type ContextdClient ¶

type ContextdClient interface {
	// Memory operations
	MemoryRecord(ctx context.Context, projectID, title, content, outcome string, tags []string) (string, float64, error)
	MemorySearch(ctx context.Context, projectID, query string, limit int) ([]MemoryResult, error)
	MemoryFeedback(ctx context.Context, memoryID string, helpful bool) (float64, error)
	MemoryOutcome(ctx context.Context, memoryID string, succeeded bool, sessionID string) (float64, error)

	// For observing state
	GetMemory(ctx context.Context, memoryID string) (*MemoryResult, error)
}

ContextdClient defines the interface for interacting with contextd. This allows mocking for unit tests.

type ContextdToolCall ¶

type ContextdToolCall struct {
	Timestamp time.Time
	SessionID string
	Tool      string // memory_search, memory_record, memory_feedback, etc.
	Input     map[string]interface{}
	Success   bool
	Output    string
}

ContextdToolCall represents a contextd MCP tool invocation.

type ConversationEntry ¶

type ConversationEntry struct {
	Type       string          `json:"type"`
	UUID       string          `json:"uuid"`
	ParentUUID *string         `json:"parentUuid"`
	SessionID  string          `json:"sessionId"`
	Timestamp  time.Time       `json:"timestamp"`
	Message    json.RawMessage `json:"message"`
	UserType   string          `json:"userType"`
	GitBranch  string          `json:"gitBranch"`
	CWD        string          `json:"cwd"`
	Summary    string          `json:"summary,omitempty"`
	ToolResult json.RawMessage `json:"toolUseResult,omitempty"`
}

ConversationEntry represents a single entry in a Claude Code JSONL export.

type ConversationStats ¶

type ConversationStats struct {
	SessionID         string
	StartTime         time.Time
	EndTime           time.Time
	Duration          time.Duration
	UserMessages      int
	AssistantMessages int
	ToolCalls         int
	ContextdToolCalls []ContextdToolCall
	MemorySearches    int
	MemoryRecords     int
	MemoryFeedbacks   int
	CheckpointSaves   int
	CheckpointResumes int
	Errors            int
}

ConversationStats holds statistics about a conversation.

func ParseConversation ¶

func ParseConversation(path string) (*ConversationStats, error)

ParseConversation parses a JSONL conversation file.

func ParseConversationsDir ¶

func ParseConversationsDir(dir string) ([]*ConversationStats, error)

ParseConversationsDir parses all JSONL files in a directory.

type FeedbackEvent ¶

type FeedbackEvent struct {
	Timestamp time.Time `json:"timestamp"`
	MemoryID  string    `json:"memory_id"`
	Helpful   bool      `json:"helpful"`
	Reasoning string    `json:"reasoning,omitempty"`
}

FeedbackEvent records when feedback was given.

type LLMClient ¶

type LLMClient interface {
	Generate(ctx context.Context, systemPrompt, userPrompt string) (string, error)
	GenerateStructured(ctx context.Context, systemPrompt, userPrompt string, schema interface{}) (interface{}, error)
}

LLMClient defines the interface for LLM interactions. Allows swapping Claude for other models or mocks.

type MemoryResult ¶

type MemoryResult struct {
	ID         string   `json:"id"`
	Title      string   `json:"title"`
	Content    string   `json:"content"`
	Outcome    string   `json:"outcome"`
	Confidence float64  `json:"confidence"`
	Tags       []string `json:"tags"`
}

MemoryResult represents a memory returned from contextd.

type MessageContent ¶

type MessageContent struct {
	Role    string      `json:"role"`
	Content interface{} `json:"content"` // Can be string or []ContentBlock
	Model   string      `json:"model,omitempty"`
}

MessageContent represents the content of a message.

type MockContextdClient ¶

type MockContextdClient struct {
	// contains filtered or unexported fields
}

MockContextdClient is a mock implementation of ContextdClient for testing. It simulates the Bayesian confidence system behavior.

func NewMockContextdClient ¶

func NewMockContextdClient() *MockContextdClient

NewMockContextdClient creates a new mock client.

func (*MockContextdClient) GetMemory ¶

func (m *MockContextdClient) GetMemory(ctx context.Context, memoryID string) (*MemoryResult, error)

GetMemory retrieves a memory by ID.

func (*MockContextdClient) GetSignalCount ¶

func (m *MockContextdClient) GetSignalCount(memoryID string) int

GetSignalCount returns the number of signals for a memory.

func (*MockContextdClient) MemoryFeedback ¶

func (m *MockContextdClient) MemoryFeedback(ctx context.Context, memoryID string, helpful bool) (float64, error)

MemoryFeedback provides feedback on a memory.

func (*MockContextdClient) MemoryOutcome ¶

func (m *MockContextdClient) MemoryOutcome(ctx context.Context, memoryID string, succeeded bool, sessionID string) (float64, error)

MemoryOutcome reports a task outcome.

func (*MockContextdClient) MemoryRecord ¶

func (m *MockContextdClient) MemoryRecord(ctx context.Context, projectID, title, content, outcome string, tags []string) (string, float64, error)

MemoryRecord creates a new memory.

func (*MockContextdClient) MemorySearch ¶

func (m *MockContextdClient) MemorySearch(ctx context.Context, projectID, query string, limit int) ([]MemoryResult, error)

MemorySearch returns memories matching the query.

func (*MockContextdClient) Reset ¶

func (m *MockContextdClient) Reset()

Reset clears all state (useful between tests).

type OutcomeEvent ¶

type OutcomeEvent struct {
	Timestamp time.Time `json:"timestamp"`
	MemoryID  string    `json:"memory_id"`
	Succeeded bool      `json:"succeeded"`
	SessionID string    `json:"session_id,omitempty"`
	TaskDesc  string    `json:"task_description,omitempty"`
}

OutcomeEvent records task outcome signals.

type Persona ¶

type Persona struct {
	// Name identifies this persona for logging
	Name string `json:"name"`

	// Description is passed to the LLM to guide behavior
	Description string `json:"description"`

	// Goals are what the persona is trying to accomplish
	Goals []string `json:"goals"`

	// Constraints limit how the persona behaves
	Constraints []string `json:"constraints"`

	// FeedbackStyle influences how the persona rates memories
	// Options: "generous", "critical", "realistic", "random"
	FeedbackStyle string `json:"feedback_style"`

	// SuccessRate is the probability tasks succeed (0.0-1.0)
	// Used for outcome signal generation
	SuccessRate float64 `json:"success_rate"`
}

Persona defines the synthetic user's characteristics and goals. These influence how the agent interacts with contextd.

type Runner ¶

type Runner struct {
	// contains filtered or unexported fields
}

Runner executes test scenarios.

func NewRunner ¶

func NewRunner(cfg RunnerConfig) (*Runner, error)

NewRunner creates a new scenario runner.

func (*Runner) RunScenario ¶

func (r *Runner) RunScenario(ctx context.Context, scenario Scenario) (*TestResult, error)

RunScenario executes a single scenario and returns results.

func (*Runner) RunScenarios ¶

func (r *Runner) RunScenarios(ctx context.Context, scenarios []Scenario) ([]TestResult, error)

RunScenarios executes multiple scenarios and aggregates results.

type RunnerConfig ¶

type RunnerConfig struct {
	Client ContextdClient
	LLM    LLMClient
	Logger *zap.Logger
}

RunnerConfig configures a Runner.

type Scenario ¶

type Scenario struct {
	// Name identifies the scenario
	Name string `json:"name"`

	// Description explains what this scenario tests
	Description string `json:"description"`

	// Persona to use for this scenario
	Persona Persona `json:"persona"`

	// ProjectID for contextd operations
	ProjectID string `json:"project_id"`

	// MaxTurns limits the conversation length
	MaxTurns int `json:"max_turns"`

	// Actions defines the sequence of actions to take
	// If empty, agent decides autonomously via LLM
	Actions []Action `json:"actions,omitempty"`

	// Assertions to check after scenario completes
	Assertions []Assertion `json:"assertions"`
}

Scenario defines a test scenario to run.

func GenerateScenarioFromStats ¶

func GenerateScenarioFromStats(stats *ConversationStats) *Scenario

GenerateScenarioFromStats creates a test scenario from conversation statistics.

type Session ¶

type Session struct {
	ID        string          `json:"id"`
	Persona   Persona         `json:"persona"`
	ProjectID string          `json:"project_id"`
	StartTime time.Time       `json:"start_time"`
	EndTime   time.Time       `json:"end_time"`
	Turns     []Turn          `json:"turns"`
	Feedback  []FeedbackEvent `json:"feedback"`
	Outcomes  []OutcomeEvent  `json:"outcomes"`
	Metrics   SessionMetrics  `json:"metrics"`
}

Session represents a complete test session.

type SessionMetrics ¶

type SessionMetrics struct {
	MemoriesRecorded   int     `json:"memories_recorded"`
	MemoriesRetrieved  int     `json:"memories_retrieved"`
	FeedbackGiven      int     `json:"feedback_given"`
	PositiveFeedback   int     `json:"positive_feedback"`
	OutcomesRecorded   int     `json:"outcomes_recorded"`
	SuccessfulOutcomes int     `json:"successful_outcomes"`
	AvgConfidenceDelta float64 `json:"avg_confidence_delta"`
}

SessionMetrics captures test results.

type TestResult ¶

type TestResult struct {
	Scenario   string         `json:"scenario"`
	Passed     bool           `json:"passed"`
	Session    *Session       `json:"session"`
	Assertions []AssertResult `json:"assertions"`
	Error      string         `json:"error,omitempty"`
	Duration   time.Duration  `json:"duration"`
}

TestResult captures the outcome of running a scenario.

type ToolCall ¶

type ToolCall struct {
	Name   string                 `json:"name"`
	Args   map[string]interface{} `json:"args"`
	Result interface{}            `json:"result,omitempty"`
	Error  string                 `json:"error,omitempty"`
}

ToolCall represents an MCP tool invocation.

type ToolUseBlock ¶

type ToolUseBlock struct {
	ID    string          `json:"id"`
	Name  string          `json:"name"`
	Input json.RawMessage `json:"input"`
}

ToolUseBlock represents a tool use in content.

type Turn ¶

type Turn struct {
	Timestamp time.Time  `json:"timestamp"`
	Role      string     `json:"role"` // "user" or "assistant"
	Content   string     `json:"content"`
	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
}

Turn represents a single interaction in a conversation.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL