Documentation
¶
Overview ¶
Package framework provides the integration test harness for contextd.
Package framework provides the integration test harness for contextd.
Package framework provides the integration test harness for contextd.
The test harness simulates developers using contextd MCP tools to validate cross-session memory, checkpoint persistence, and secret scrubbing. It uses a mock vector store for deterministic testing while leveraging real service implementations for ReasoningBank, Checkpoint, and Secrets.
Key components:
- Developer: Simulates a developer using contextd tools
- SharedStore: Enables cross-developer knowledge sharing tests
- TestHarness: Provides setup/teardown helpers for test isolation
Known limitation: The mock store does not test semantic similarity. See docs/spec/test-harness/KNOWN-GAPS.md for details.
Package framework provides the integration test harness for contextd.
Package framework provides the integration test harness for contextd.
Index ¶
- func AllPassed(results []AssertionResult) bool
- func CheckpointResumeActivity(ctx context.Context, input CheckpointResumeInput) error
- func CheckpointSaveActivity(ctx context.Context, input CheckpointSaveInput) (string, error)
- func ClearContextActivity(ctx context.Context, handle ContextdHandle) error
- func RecordMemoryActivity(ctx context.Context, input RecordMemoryInput) (string, error)
- func StopContextdActivity(ctx context.Context, handle ContextdHandle) error
- func SummaryMessage(results []AssertionResult) string
- func ValidateSessionStep(step SessionStep) string
- type Activities
- func (a *Activities) CheckpointResumeActivity(ctx context.Context, input CheckpointResumeInput) error
- func (a *Activities) CheckpointSaveActivity(ctx context.Context, input CheckpointSaveInput) (string, error)
- func (a *Activities) ClearContextActivity(ctx context.Context, handle ContextdHandle) error
- func (a *Activities) GiveFeedbackActivity(ctx context.Context, handle ContextdHandle, memoryID string, helpful bool, ...) error
- func (a *Activities) RecordMemoryActivity(ctx context.Context, input RecordMemoryInput) (string, error)
- func (a *Activities) SearchMemoryActivity(ctx context.Context, input SearchMemoryInput) ([]MemoryResult, error)
- func (a *Activities) StartContextdActivity(ctx context.Context, config DeveloperConfig) (ContextdHandle, error)
- func (a *Activities) StopContextdActivity(ctx context.Context, handle ContextdHandle) error
- type AssertionResult
- func EvaluateAssertionSet(set AssertionSet, result *SessionResult) []AssertionResult
- func EvaluateBehavioralAssertion(assertion BehavioralAssertion, result *SessionResult) AssertionResult
- func EvaluateBinaryAssertion(assertion BinaryAssertion, result *SessionResult) AssertionResult
- func EvaluateThresholdAssertion(assertion ThresholdAssertion, result *SessionResult) AssertionResult
- func FailedAssertions(results []AssertionResult) []AssertionResult
- type AssertionSet
- type BehavioralAssertion
- type BinaryAssertion
- type CheckpointResult
- type CheckpointResumeInput
- type CheckpointSaveInput
- type CheckpointSaveRequest
- type ContextdHandle
- type Developer
- func (d *Developer) GiveFeedback(ctx context.Context, memoryID string, helpful bool, reasoning string) error
- func (d *Developer) ID() string
- func (d *Developer) IsContextdRunning() bool
- func (d *Developer) ListCheckpoints(ctx context.Context, limit int) ([]CheckpointResult, error)
- func (d *Developer) RecordMemory(ctx context.Context, record MemoryRecord) (string, error)
- func (d *Developer) ResumeCheckpoint(ctx context.Context, checkpointID string) (*CheckpointResult, error)
- func (d *Developer) SaveCheckpoint(ctx context.Context, req CheckpointSaveRequest) (string, error)
- func (d *Developer) SearchMemory(ctx context.Context, query string, limit int) ([]MemoryResult, error)
- func (d *Developer) SessionID() string
- func (d *Developer) SessionStats() SessionStats
- func (d *Developer) SetSessionID(sessionID string)
- func (d *Developer) StartContextd(ctx context.Context) error
- func (d *Developer) StopContextd(ctx context.Context) error
- func (d *Developer) TenantID() string
- type DeveloperConfig
- type MemoryRecord
- type MemoryResult
- type MetricStats
- type RecordMemoryInput
- type SearchMemoryInput
- type SessionConfig
- type SessionResult
- type SessionStats
- type SessionStep
- type SharedStore
- type SharedStoreConfig
- type SuiteResult
- type TestConfig
- type TestHarness
- type TestMetrics
- func (m *TestMetrics) GetStats() MetricStats
- func (m *TestMetrics) RecordCheckpointLoad(ctx context.Context, latency time.Duration, success bool)
- func (m *TestMetrics) RecordCheckpointSave(ctx context.Context, latency time.Duration)
- func (m *TestMetrics) RecordConfidenceScore(ctx context.Context, span trace.Span, score float64)
- func (m *TestMetrics) RecordMemorySearch(ctx context.Context, latency time.Duration, hit bool, crossDev bool)
- func (m *TestMetrics) RecordSuiteDuration(ctx context.Context, suiteName string, duration time.Duration)
- func (m *TestMetrics) RecordTestFail(ctx context.Context, suite, testName string)
- func (m *TestMetrics) RecordTestPass(ctx context.Context, suite, testName string)
- func (m *TestMetrics) Reset()
- func (m *TestMetrics) StartPhaseSpan(ctx context.Context, phase string) (context.Context, trace.Span)
- func (m *TestMetrics) StartSuiteSpan(ctx context.Context, suiteName string) (context.Context, trace.Span)
- func (m *TestMetrics) StartTestSpan(ctx context.Context, testName string) (context.Context, trace.Span)
- type TestReport
- type TestResult
- type ThresholdAssertion
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AllPassed ¶
func AllPassed(results []AssertionResult) bool
AllPassed returns true if all assertion results passed.
func CheckpointResumeActivity ¶
func CheckpointResumeActivity(ctx context.Context, input CheckpointResumeInput) error
CheckpointResumeActivity is the activity function signature for resuming checkpoint.
func CheckpointSaveActivity ¶
func CheckpointSaveActivity(ctx context.Context, input CheckpointSaveInput) (string, error)
CheckpointSaveActivity is the activity function signature for saving checkpoint.
func ClearContextActivity ¶
func ClearContextActivity(ctx context.Context, handle ContextdHandle) error
ClearContextActivity is the activity function signature for clearing context.
func RecordMemoryActivity ¶
func RecordMemoryActivity(ctx context.Context, input RecordMemoryInput) (string, error)
RecordMemoryActivity is the activity function signature for recording memory.
func StopContextdActivity ¶
func StopContextdActivity(ctx context.Context, handle ContextdHandle) error
StopContextdActivity is the activity function signature for stopping contextd.
func SummaryMessage ¶
func SummaryMessage(results []AssertionResult) string
SummaryMessage creates a human-readable summary of assertion results.
func ValidateSessionStep ¶
func ValidateSessionStep(step SessionStep) string
ValidateSessionStep validates that a SessionStep has all required fields for its type. Returns an actionable error message if validation fails, empty string if valid.
Types ¶
type Activities ¶
type Activities struct {
// contains filtered or unexported fields
}
Activities encapsulates all test framework activities. This allows proper dependency injection for testing.
func NewActivities ¶
func NewActivities(sharedStore *SharedStore) *Activities
NewActivities creates a new Activities instance.
func (*Activities) CheckpointResumeActivity ¶
func (a *Activities) CheckpointResumeActivity(ctx context.Context, input CheckpointResumeInput) error
CheckpointResumeActivity resumes from a checkpoint.
func (*Activities) CheckpointSaveActivity ¶
func (a *Activities) CheckpointSaveActivity(ctx context.Context, input CheckpointSaveInput) (string, error)
CheckpointSaveActivity saves a checkpoint.
func (*Activities) ClearContextActivity ¶
func (a *Activities) ClearContextActivity(ctx context.Context, handle ContextdHandle) error
ClearContextActivity clears the current context (simulates /clear). In the test harness, this is a no-op since we don't simulate LLM context.
func (*Activities) GiveFeedbackActivity ¶
func (a *Activities) GiveFeedbackActivity(ctx context.Context, handle ContextdHandle, memoryID string, helpful bool, reasoning string) error
GiveFeedbackActivity gives feedback on a memory.
func (*Activities) RecordMemoryActivity ¶
func (a *Activities) RecordMemoryActivity(ctx context.Context, input RecordMemoryInput) (string, error)
RecordMemoryActivity records a memory via contextd.
func (*Activities) SearchMemoryActivity ¶
func (a *Activities) SearchMemoryActivity(ctx context.Context, input SearchMemoryInput) ([]MemoryResult, error)
SearchMemoryActivity searches for memories via contextd.
func (*Activities) StartContextdActivity ¶
func (a *Activities) StartContextdActivity(ctx context.Context, config DeveloperConfig) (ContextdHandle, error)
StartContextdActivity starts a contextd instance for a developer.
func (*Activities) StopContextdActivity ¶
func (a *Activities) StopContextdActivity(ctx context.Context, handle ContextdHandle) error
StopContextdActivity stops a contextd instance.
type AssertionResult ¶
type AssertionResult struct {
Type string // "binary", "threshold", "behavioral"
Check string // What was checked
Passed bool // Whether assertion passed
Message string // Human-readable explanation
ActualValue float64 // For threshold assertions
}
AssertionResult contains the result of evaluating an assertion.
func EvaluateAssertionSet ¶
func EvaluateAssertionSet(set AssertionSet, result *SessionResult) []AssertionResult
EvaluateAssertionSet evaluates all assertions in a set.
func EvaluateBehavioralAssertion ¶
func EvaluateBehavioralAssertion(assertion BehavioralAssertion, result *SessionResult) AssertionResult
EvaluateBehavioralAssertion evaluates a behavioral assertion against session results.
func EvaluateBinaryAssertion ¶
func EvaluateBinaryAssertion(assertion BinaryAssertion, result *SessionResult) AssertionResult
EvaluateBinaryAssertion evaluates a binary assertion against session results.
func EvaluateThresholdAssertion ¶
func EvaluateThresholdAssertion(assertion ThresholdAssertion, result *SessionResult) AssertionResult
EvaluateThresholdAssertion evaluates a threshold assertion against session results.
func FailedAssertions ¶
func FailedAssertions(results []AssertionResult) []AssertionResult
FailedAssertions returns only the failed assertions.
type AssertionSet ¶
type AssertionSet struct {
Binary []BinaryAssertion
Threshold []ThresholdAssertion
Behavioral []BehavioralAssertion
}
AssertionSet groups multiple assertions for a test.
type BehavioralAssertion ¶
type BehavioralAssertion struct {
Check string // What to check: "content_pattern", "ast_pattern"
Method string // How to check: "regex_match", "ast_pattern_match"
Patterns []string // Patterns that SHOULD be present
NegativePatterns []string // Patterns that should NOT be present
LLMJudge bool // Use LLM to evaluate if automated check insufficient
}
BehavioralAssertion is a pattern-based assertion.
type BinaryAssertion ¶
type BinaryAssertion struct {
Check string // What to check: "tool_called", "search_has_results", "file_exists"
Method string // How to check: "record_memory", "memory_search", etc.
Target string // Specific target if needed
}
BinaryAssertion is a pass/fail assertion.
type CheckpointResult ¶
type CheckpointResult struct {
ID string
Name string
Summary string
Context string
ProjectPath string
CreatedAt time.Time
}
CheckpointResult represents a checkpoint.
type CheckpointResumeInput ¶
type CheckpointResumeInput struct {
ContextdHandle ContextdHandle
CheckpointID string
}
CheckpointResumeInput is the input for CheckpointResumeActivity.
type CheckpointSaveInput ¶
type CheckpointSaveInput struct {
ContextdHandle ContextdHandle
Summary string
}
CheckpointSaveInput is the input for CheckpointSaveActivity.
type CheckpointSaveRequest ¶
CheckpointSaveRequest represents a request to save a checkpoint.
type ContextdHandle ¶
type ContextdHandle struct {
ID string
Developer DeveloperConfig
}
ContextdHandle represents a running contextd instance.
func StartContextdActivity ¶
func StartContextdActivity(ctx context.Context, config DeveloperConfig) (ContextdHandle, error)
StartContextdActivity is the activity function signature for starting contextd.
type Developer ¶
type Developer struct {
// contains filtered or unexported fields
}
Developer simulates a developer using contextd.
func NewDeveloper ¶
func NewDeveloper(cfg DeveloperConfig) (*Developer, error)
NewDeveloper creates a new developer simulator with its own isolated store.
func NewDeveloperWithStore ¶
func NewDeveloperWithStore(cfg DeveloperConfig, shared *SharedStore) (*Developer, error)
NewDeveloperWithStore creates a developer simulator using a shared store. This enables cross-developer knowledge sharing scenarios.
func (*Developer) GiveFeedback ¶
func (d *Developer) GiveFeedback(ctx context.Context, memoryID string, helpful bool, reasoning string) error
GiveFeedback gives feedback on a memory.
func (*Developer) IsContextdRunning ¶
IsContextdRunning returns whether contextd is running.
func (*Developer) ListCheckpoints ¶
ListCheckpoints lists checkpoints for this developer's session.
func (*Developer) RecordMemory ¶
RecordMemory records a memory via contextd. Content is automatically scrubbed for secrets before storage (simulates MCP layer).
func (*Developer) ResumeCheckpoint ¶
func (d *Developer) ResumeCheckpoint(ctx context.Context, checkpointID string) (*CheckpointResult, error)
ResumeCheckpoint resumes from a checkpoint.
func (*Developer) SaveCheckpoint ¶
SaveCheckpoint saves a checkpoint of the current session.
func (*Developer) SearchMemory ¶
func (d *Developer) SearchMemory(ctx context.Context, query string, limit int) ([]MemoryResult, error)
SearchMemory searches for memories via contextd. Results are automatically scrubbed for secrets (defense-in-depth, simulates MCP layer).
func (*Developer) SessionStats ¶
func (d *Developer) SessionStats() SessionStats
SessionStats returns the current session statistics.
func (*Developer) SetSessionID ¶
SetSessionID sets the session ID (for resuming sessions).
func (*Developer) StartContextd ¶
StartContextd starts the contextd services for this developer.
func (*Developer) StopContextd ¶
StopContextd stops the contextd services.
type DeveloperConfig ¶
type DeveloperConfig struct {
ID string
TenantID string
TeamID string
ProjectID string
Logger *zap.Logger
}
DeveloperConfig configures a simulated developer.
type MemoryRecord ¶
MemoryRecord represents a memory to record.
type MemoryResult ¶
MemoryResult represents a search result.
func SearchMemoryActivity ¶
func SearchMemoryActivity(ctx context.Context, input SearchMemoryInput) ([]MemoryResult, error)
SearchMemoryActivity is the activity function signature for searching memory.
type MetricStats ¶
type MetricStats struct {
MemoryHitCount int64
MemoryMissCount int64
MemoryHitRate float64
CheckpointSuccessCount int64
CheckpointFailureCount int64
CheckpointSuccessRate float64
CrossDevSearchCount int64
TotalSearchCount int64
CrossDevSearchRate float64
}
MetricStats holds a snapshot of metric statistics.
type RecordMemoryInput ¶
type RecordMemoryInput struct {
ContextdHandle ContextdHandle
Memory MemoryRecord
}
RecordMemoryInput is the input for RecordMemoryActivity.
type SearchMemoryInput ¶
type SearchMemoryInput struct {
ContextdHandle ContextdHandle
Query string
Limit int
}
SearchMemoryInput is the input for SearchMemoryActivity.
type SessionConfig ¶
type SessionConfig struct {
Developer DeveloperConfig
Steps []SessionStep
}
SessionConfig configures a developer session workflow.
type SessionResult ¶
type SessionResult struct {
Developer DeveloperConfig
MemoryIDs []string
SearchResults [][]MemoryResult
Checkpoints []string // Checkpoint IDs saved during session
Errors []string
}
SessionResult contains results from a developer session.
func DeveloperSessionWorkflow ¶
func DeveloperSessionWorkflow(ctx workflow.Context, session SessionConfig) (*SessionResult, error)
DeveloperSessionWorkflow simulates a developer using contextd.
type SessionStats ¶
type SessionStats struct {
MemoryRecords int
MemorySearches int
MemoryFeedbacks int
Checkpoints int
TotalToolCalls int
}
SessionStats tracks tool usage during a session.
type SessionStep ¶
type SessionStep struct {
Type string // "record_memory", "search_memory", "checkpoint_save", "checkpoint_resume", "clear_context"
Memory *MemoryRecord
Query string
Limit int
Summary string
CheckpointID string
}
SessionStep represents a step in a developer session.
type SharedStore ¶
type SharedStore struct {
// contains filtered or unexported fields
}
SharedStore represents a shared vector store that multiple developers can use. This simulates the production scenario where developers share a Qdrant backend.
func NewSharedStore ¶
func NewSharedStore(cfg SharedStoreConfig) (*SharedStore, error)
NewSharedStore creates a new shared store for cross-developer testing. Uses a mock store implementation that provides deterministic behavior for tests.
func (*SharedStore) Store ¶
func (s *SharedStore) Store() vectorstore.Store
Store returns the underlying vector store.
type SharedStoreConfig ¶
type SharedStoreConfig struct {
}
SharedStoreConfig configures a shared vector store for multiple developers.
type SuiteResult ¶
type SuiteResult struct {
SuiteName string
Tests []TestResult
Passed int
Failed int
Errors []string
}
SuiteResult contains results from a single test suite.
func BugfixLearningWorkflow ¶
func BugfixLearningWorkflow(ctx workflow.Context, config TestConfig) (*SuiteResult, error)
BugfixLearningWorkflow runs bug-fix learning tests.
func MultiSessionWorkflow ¶
func MultiSessionWorkflow(ctx workflow.Context, config TestConfig) (*SuiteResult, error)
MultiSessionWorkflow runs multi-session continuity tests.
func PolicyComplianceWorkflow ¶
func PolicyComplianceWorkflow(ctx workflow.Context, config TestConfig) (*SuiteResult, error)
PolicyComplianceWorkflow runs policy compliance tests.
type TestConfig ¶
TestConfig configures the test orchestrator.
type TestHarness ¶
type TestHarness struct {
// contains filtered or unexported fields
}
TestHarness provides test setup and teardown helpers.
func NewTestHarness ¶
func NewTestHarness(projectID string) (*TestHarness, error)
NewTestHarness creates a new test harness for isolated testing.
func (*TestHarness) Cleanup ¶
func (h *TestHarness) Cleanup(ctx context.Context) error
Cleanup stops all developers and closes the shared store. Call this in a defer statement after creating the harness.
func (*TestHarness) CreateDeveloper ¶
func (h *TestHarness) CreateDeveloper(id, tenantID string) (*Developer, error)
CreateDeveloper creates a new developer attached to the harness.
func (*TestHarness) SharedStore ¶
func (h *TestHarness) SharedStore() *SharedStore
SharedStore returns the underlying shared store.
type TestMetrics ¶
type TestMetrics struct {
// contains filtered or unexported fields
}
TestMetrics provides observability for integration tests.
func NewTestMetrics ¶
func NewTestMetrics() (*TestMetrics, error)
NewTestMetrics creates a new TestMetrics instance.
func (*TestMetrics) GetStats ¶
func (m *TestMetrics) GetStats() MetricStats
GetStats returns current metric statistics (for testing/debugging).
func (*TestMetrics) RecordCheckpointLoad ¶
func (m *TestMetrics) RecordCheckpointLoad(ctx context.Context, latency time.Duration, success bool)
RecordCheckpointLoad records a checkpoint load operation.
func (*TestMetrics) RecordCheckpointSave ¶
func (m *TestMetrics) RecordCheckpointSave(ctx context.Context, latency time.Duration)
RecordCheckpointSave records a checkpoint save operation.
func (*TestMetrics) RecordConfidenceScore ¶
RecordConfidenceScore records a confidence score observation.
func (*TestMetrics) RecordMemorySearch ¶
func (m *TestMetrics) RecordMemorySearch(ctx context.Context, latency time.Duration, hit bool, crossDev bool)
RecordMemorySearch records a memory search operation.
func (*TestMetrics) RecordSuiteDuration ¶
func (m *TestMetrics) RecordSuiteDuration(ctx context.Context, suiteName string, duration time.Duration)
RecordSuiteDuration records the duration of a test suite.
func (*TestMetrics) RecordTestFail ¶
func (m *TestMetrics) RecordTestFail(ctx context.Context, suite, testName string)
RecordTestFail records a failed test.
func (*TestMetrics) RecordTestPass ¶
func (m *TestMetrics) RecordTestPass(ctx context.Context, suite, testName string)
RecordTestPass records a passed test.
func (*TestMetrics) Reset ¶
func (m *TestMetrics) Reset()
Reset resets all counters (useful for testing).
func (*TestMetrics) StartPhaseSpan ¶
func (m *TestMetrics) StartPhaseSpan(ctx context.Context, phase string) (context.Context, trace.Span)
StartPhaseSpan starts a trace span for a test phase.
func (*TestMetrics) StartSuiteSpan ¶
func (m *TestMetrics) StartSuiteSpan(ctx context.Context, suiteName string) (context.Context, trace.Span)
StartSuiteSpan starts a trace span for a test suite.
func (*TestMetrics) StartTestSpan ¶
func (m *TestMetrics) StartTestSpan(ctx context.Context, testName string) (context.Context, trace.Span)
StartTestSpan starts a trace span for a single test.
type TestReport ¶
type TestReport struct {
Suites []SuiteResult
Errors []string
}
TestReport contains results from all test suites.
func TestOrchestratorWorkflow ¶
func TestOrchestratorWorkflow(ctx workflow.Context, config TestConfig) (*TestReport, error)
TestOrchestratorWorkflow coordinates all test suites.
type TestResult ¶
type TestResult struct {
TestName string
Passed bool
Message string
Duration int64 // milliseconds
}
TestResult contains results from a single test case.
type ThresholdAssertion ¶
type ThresholdAssertion struct {
Check string // What to check: "confidence", "result_count", "latency"
Method string // How to check: "first_result", "latest_search", etc.
Threshold float64 // Value to compare against
Operator string // Comparison operator: ">", "<", ">=", "<=", "=="
}
ThresholdAssertion is a comparison-based assertion.