agent

package
v0.3.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 14, 2026 License: GPL-3.0 Imports: 12 Imported by: 0

Documentation

Overview

Package agent provides test agent functionality for contextd validation.

Package agent provides a synthetic user agent for testing contextd's Bayesian confidence system. It simulates realistic user interactions including memory recording, searching, feedback, and outcome reporting.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AnalyzeConversations

func AnalyzeConversations(stats []*ConversationStats) map[string]interface{}

AnalyzeConversations provides aggregate statistics across conversations.

Types

type Action

type Action struct {
	Type string                 `json:"type"` // "record", "search", "feedback", "outcome"
	Args map[string]interface{} `json:"args"`
}

Action represents a specific action in a scenario.

type Agent

type Agent struct {
	// contains filtered or unexported fields
}

Agent is a synthetic user agent for testing contextd.

func New

func New(cfg Config) (*Agent, error)

New creates a new test agent.

func (*Agent) GenerateFeedbackDecision

func (a *Agent) GenerateFeedbackDecision(memory MemoryResult) bool

GenerateFeedbackDecision decides if a memory was helpful based on persona.

func (*Agent) GenerateOutcome

func (a *Agent) GenerateOutcome() bool

GenerateOutcome decides if a task succeeded based on persona's success rate.

func (*Agent) GetConfidenceHistory

func (a *Agent) GetConfidenceHistory(memoryID string) []float64

GetConfidenceHistory returns confidence tracking for a memory.

func (*Agent) GetSession

func (a *Agent) GetSession() *Session

GetSession returns the current session state.

func (*Agent) GiveFeedback

func (a *Agent) GiveFeedback(ctx context.Context, memoryID string, helpful bool, reasoning string) (float64, error)

GiveFeedback provides feedback on a memory.

func (*Agent) RecordMemory

func (a *Agent) RecordMemory(ctx context.Context, title, content, outcome string, tags []string) (string, error)

RecordMemory records a new memory and tracks confidence.

func (*Agent) ReportOutcome

func (a *Agent) ReportOutcome(ctx context.Context, memoryID string, succeeded bool, taskDesc string) (float64, error)

ReportOutcome reports a task outcome for a memory.

func (*Agent) SearchMemories

func (a *Agent) SearchMemories(ctx context.Context, query string, limit int) ([]MemoryResult, error)

SearchMemories searches for relevant memories.

func (*Agent) ShouldGiveFeedback

func (a *Agent) ShouldGiveFeedback() bool

ShouldGiveFeedback decides if feedback should be given based on persona.

type AssertResult

type AssertResult struct {
	Assertion Assertion   `json:"assertion"`
	Passed    bool        `json:"passed"`
	Actual    interface{} `json:"actual,omitempty"`
	Message   string      `json:"message,omitempty"`
}

AssertResult captures individual assertion outcomes.

type Assertion

type Assertion struct {
	// Type of assertion
	// Options: "confidence_increased", "confidence_decreased",
	//          "confidence_above", "confidence_below",
	//          "memory_count", "weight_shifted"
	Type string `json:"type"`

	// Target identifies what to check (e.g., memory_id)
	Target string `json:"target,omitempty"`

	// Value for comparison
	Value interface{} `json:"value,omitempty"`

	// Message to show on failure
	Message string `json:"message,omitempty"`
}

Assertion defines an expected outcome.

type Config

type Config struct {
	Client    ContextdClient
	LLM       LLMClient
	Persona   Persona
	ProjectID string
	Logger    *zap.Logger
}

Config configures an Agent.

type ContentBlock

type ContentBlock struct {
	Type    string          `json:"type"`
	Text    string          `json:"text,omitempty"`
	ToolUse *ToolUseBlock   `json:"tool_use,omitempty"`
	ID      string          `json:"id,omitempty"`
	Name    string          `json:"name,omitempty"`
	Input   json.RawMessage `json:"input,omitempty"`
}

ContentBlock represents a content block in assistant messages.

type ContextdClient

type ContextdClient interface {
	// Memory operations
	MemoryRecord(ctx context.Context, projectID, title, content, outcome string, tags []string) (string, float64, error)
	MemorySearch(ctx context.Context, projectID, query string, limit int) ([]MemoryResult, error)
	MemoryFeedback(ctx context.Context, memoryID string, helpful bool) (float64, error)
	MemoryOutcome(ctx context.Context, memoryID string, succeeded bool, sessionID string) (float64, error)

	// For observing state
	GetMemory(ctx context.Context, memoryID string) (*MemoryResult, error)
}

ContextdClient defines the interface for interacting with contextd. This allows mocking for unit tests.

type ContextdToolCall

type ContextdToolCall struct {
	Timestamp time.Time
	SessionID string
	Tool      string // memory_search, memory_record, memory_feedback, etc.
	Input     map[string]interface{}
	Success   bool
	Output    string
}

ContextdToolCall represents a contextd MCP tool invocation.

type ConversationEntry

type ConversationEntry struct {
	Type       string          `json:"type"`
	UUID       string          `json:"uuid"`
	ParentUUID *string         `json:"parentUuid"`
	SessionID  string          `json:"sessionId"`
	Timestamp  time.Time       `json:"timestamp"`
	Message    json.RawMessage `json:"message"`
	UserType   string          `json:"userType"`
	GitBranch  string          `json:"gitBranch"`
	CWD        string          `json:"cwd"`
	Summary    string          `json:"summary,omitempty"`
	ToolResult json.RawMessage `json:"toolUseResult,omitempty"`
}

ConversationEntry represents a single entry in a Claude Code JSONL export.

type ConversationStats

type ConversationStats struct {
	SessionID         string
	StartTime         time.Time
	EndTime           time.Time
	Duration          time.Duration
	UserMessages      int
	AssistantMessages int
	ToolCalls         int
	ContextdToolCalls []ContextdToolCall
	MemorySearches    int
	MemoryRecords     int
	MemoryFeedbacks   int
	CheckpointSaves   int
	CheckpointResumes int
	Errors            int
}

ConversationStats holds statistics about a conversation.

func ParseConversation

func ParseConversation(path string) (*ConversationStats, error)

ParseConversation parses a JSONL conversation file.

func ParseConversationsDir

func ParseConversationsDir(dir string) ([]*ConversationStats, error)

ParseConversationsDir parses all JSONL files in a directory.

type FeedbackEvent

type FeedbackEvent struct {
	Timestamp time.Time `json:"timestamp"`
	MemoryID  string    `json:"memory_id"`
	Helpful   bool      `json:"helpful"`
	Reasoning string    `json:"reasoning,omitempty"`
}

FeedbackEvent records when feedback was given.

type LLMClient

type LLMClient interface {
	Generate(ctx context.Context, systemPrompt, userPrompt string) (string, error)
	GenerateStructured(ctx context.Context, systemPrompt, userPrompt string, schema interface{}) (interface{}, error)
}

LLMClient defines the interface for LLM interactions. Allows swapping Claude for other models or mocks.

type MemoryResult

type MemoryResult struct {
	ID         string   `json:"id"`
	Title      string   `json:"title"`
	Content    string   `json:"content"`
	Outcome    string   `json:"outcome"`
	Confidence float64  `json:"confidence"`
	Tags       []string `json:"tags"`
}

MemoryResult represents a memory returned from contextd.

type MessageContent

type MessageContent struct {
	Role    string      `json:"role"`
	Content interface{} `json:"content"` // Can be string or []ContentBlock
	Model   string      `json:"model,omitempty"`
}

MessageContent represents the content of a message.

type MockContextdClient

type MockContextdClient struct {
	// contains filtered or unexported fields
}

MockContextdClient is a mock implementation of ContextdClient for testing. It simulates the Bayesian confidence system behavior.

func NewMockContextdClient

func NewMockContextdClient() *MockContextdClient

NewMockContextdClient creates a new mock client.

func (*MockContextdClient) GetMemory

func (m *MockContextdClient) GetMemory(ctx context.Context, memoryID string) (*MemoryResult, error)

GetMemory retrieves a memory by ID.

func (*MockContextdClient) GetSignalCount

func (m *MockContextdClient) GetSignalCount(memoryID string) int

GetSignalCount returns the number of signals for a memory.

func (*MockContextdClient) MemoryFeedback

func (m *MockContextdClient) MemoryFeedback(ctx context.Context, memoryID string, helpful bool) (float64, error)

MemoryFeedback provides feedback on a memory.

func (*MockContextdClient) MemoryOutcome

func (m *MockContextdClient) MemoryOutcome(ctx context.Context, memoryID string, succeeded bool, sessionID string) (float64, error)

MemoryOutcome reports a task outcome.

func (*MockContextdClient) MemoryRecord

func (m *MockContextdClient) MemoryRecord(ctx context.Context, projectID, title, content, outcome string, tags []string) (string, float64, error)

MemoryRecord creates a new memory.

func (*MockContextdClient) MemorySearch

func (m *MockContextdClient) MemorySearch(ctx context.Context, projectID, query string, limit int) ([]MemoryResult, error)

MemorySearch returns memories matching the query.

func (*MockContextdClient) Reset

func (m *MockContextdClient) Reset()

Reset clears all state (useful between tests).

type OutcomeEvent

type OutcomeEvent struct {
	Timestamp time.Time `json:"timestamp"`
	MemoryID  string    `json:"memory_id"`
	Succeeded bool      `json:"succeeded"`
	SessionID string    `json:"session_id,omitempty"`
	TaskDesc  string    `json:"task_description,omitempty"`
}

OutcomeEvent records task outcome signals.

type Persona

type Persona struct {
	// Name identifies this persona for logging
	Name string `json:"name"`

	// Description is passed to the LLM to guide behavior
	Description string `json:"description"`

	// Goals are what the persona is trying to accomplish
	Goals []string `json:"goals"`

	// Constraints limit how the persona behaves
	Constraints []string `json:"constraints"`

	// FeedbackStyle influences how the persona rates memories
	// Options: "generous", "critical", "realistic", "random"
	FeedbackStyle string `json:"feedback_style"`

	// SuccessRate is the probability tasks succeed (0.0-1.0)
	// Used for outcome signal generation
	SuccessRate float64 `json:"success_rate"`
}

Persona defines the synthetic user's characteristics and goals. These influence how the agent interacts with contextd.

type Runner

type Runner struct {
	// contains filtered or unexported fields
}

Runner executes test scenarios.

func NewRunner

func NewRunner(cfg RunnerConfig) (*Runner, error)

NewRunner creates a new scenario runner.

func (*Runner) RunScenario

func (r *Runner) RunScenario(ctx context.Context, scenario Scenario) (*TestResult, error)

RunScenario executes a single scenario and returns results.

func (*Runner) RunScenarios

func (r *Runner) RunScenarios(ctx context.Context, scenarios []Scenario) ([]TestResult, error)

RunScenarios executes multiple scenarios and aggregates results.

type RunnerConfig

type RunnerConfig struct {
	Client ContextdClient
	LLM    LLMClient
	Logger *zap.Logger
}

RunnerConfig configures a Runner.

type Scenario

type Scenario struct {
	// Name identifies the scenario
	Name string `json:"name"`

	// Description explains what this scenario tests
	Description string `json:"description"`

	// Persona to use for this scenario
	Persona Persona `json:"persona"`

	// ProjectID for contextd operations
	ProjectID string `json:"project_id"`

	// MaxTurns limits the conversation length
	MaxTurns int `json:"max_turns"`

	// Actions defines the sequence of actions to take
	// If empty, agent decides autonomously via LLM
	Actions []Action `json:"actions,omitempty"`

	// Assertions to check after scenario completes
	Assertions []Assertion `json:"assertions"`
}

Scenario defines a test scenario to run.

func GenerateScenarioFromStats

func GenerateScenarioFromStats(stats *ConversationStats) *Scenario

GenerateScenarioFromStats creates a test scenario from conversation statistics.

type Session

type Session struct {
	ID        string          `json:"id"`
	Persona   Persona         `json:"persona"`
	ProjectID string          `json:"project_id"`
	StartTime time.Time       `json:"start_time"`
	EndTime   time.Time       `json:"end_time"`
	Turns     []Turn          `json:"turns"`
	Feedback  []FeedbackEvent `json:"feedback"`
	Outcomes  []OutcomeEvent  `json:"outcomes"`
	Metrics   SessionMetrics  `json:"metrics"`
}

Session represents a complete test session.

type SessionMetrics

type SessionMetrics struct {
	MemoriesRecorded   int     `json:"memories_recorded"`
	MemoriesRetrieved  int     `json:"memories_retrieved"`
	FeedbackGiven      int     `json:"feedback_given"`
	PositiveFeedback   int     `json:"positive_feedback"`
	OutcomesRecorded   int     `json:"outcomes_recorded"`
	SuccessfulOutcomes int     `json:"successful_outcomes"`
	AvgConfidenceDelta float64 `json:"avg_confidence_delta"`
}

SessionMetrics captures test results.

type TestResult

type TestResult struct {
	Scenario   string         `json:"scenario"`
	Passed     bool           `json:"passed"`
	Session    *Session       `json:"session"`
	Assertions []AssertResult `json:"assertions"`
	Error      string         `json:"error,omitempty"`
	Duration   time.Duration  `json:"duration"`
}

TestResult captures the outcome of running a scenario.

type ToolCall

type ToolCall struct {
	Name   string                 `json:"name"`
	Args   map[string]interface{} `json:"args"`
	Result interface{}            `json:"result,omitempty"`
	Error  string                 `json:"error,omitempty"`
}

ToolCall represents an MCP tool invocation.

type ToolUseBlock

type ToolUseBlock struct {
	ID    string          `json:"id"`
	Name  string          `json:"name"`
	Input json.RawMessage `json:"input"`
}

ToolUseBlock represents a tool use in content.

type Turn

type Turn struct {
	Timestamp time.Time  `json:"timestamp"`
	Role      string     `json:"role"` // "user" or "assistant"
	Content   string     `json:"content"`
	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
}

Turn represents a single interaction in a conversation.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL