harness

package

v0.0.0-...-8acab51 Latest Latest Go to latest Published: Apr 26, 2026 License: MIT Imports: 38 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/IceWhaleTech/ZimaOS-Blue

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func ApplyHarnessContractToExpected(expected map[string]interface{}, contract HarnessContract) map[string]interface{}
func Batch1ExecutionCaseCount() int
func BuildHarnessCheckpointContext(raw map[string]interface{}) string
func BuildHarnessContractContext(contract HarnessContract) string
func DefaultDatasetBundleGitHubPathForRepo(owner, repo string) string
func DesktopChatSuccessRateCaseCount() int
func HarnessAdaptivePolicyMetadata(policy HarnessAdaptivePolicy) map[string]interface{}
func HarnessContractFallbackPlan(contract HarnessContract) []string
func HarnessContractMetadata(contract HarnessContract) map[string]interface{}
func HarnessContractSuccessCriteria(contract HarnessContract) []string
func LegacyStoreDBPath(dataDir string) string
func NewRuntimeEventObserverMux(observers ...tools.RuntimeEventObserver) tools.RuntimeEventObserver
func SelectorCuratedCaseCount() int
func WithRunContext(ctx context.Context, runCtx *RunContext) context.Context
type ActionDescriptor
type ActionDriver
type ActionInputDescriptor
type ActionInputFieldDescriptor
type AgentCompatHandler
- func NewAgentCompatHandler(manager *Controller, store *agentpkg.Store, runner *agentpkg.Runner, ...) *AgentCompatHandler
- func (h *AgentCompatHandler) CancelTask(c echo.Context) error
- func (h *AgentCompatHandler) CreateTask(c echo.Context) error
- func (h *AgentCompatHandler) DeleteTask(c echo.Context) error
- func (h *AgentCompatHandler) RegisterRoutes(g *echo.Group)
- func (h *AgentCompatHandler) SendMessage(c echo.Context) error
- func (h *AgentCompatHandler) SubmitAnswer(c echo.Context) error
type ApprovalMode
type ArtifactRef
type Baseline
type BaselineFilter
type BaselineSpec
type Batch1ExecutionAssets
type BatchTrajectoryCase
type BatchTrajectoryCaseResult
type BatchTrajectoryExecutor
type BatchTrajectoryRunResult
type BatchTrajectoryRunner
- func NewBatchTrajectoryRunner(cfg BatchTrajectoryRunnerConfig, executor BatchTrajectoryExecutor) *BatchTrajectoryRunner
- func (r *BatchTrajectoryRunner) Run(ctx context.Context, cases []BatchTrajectoryCase) (*BatchTrajectoryRunResult, error)
type BatchTrajectoryRunnerConfig
type CUABenchmarkCaseResult
- func CUAOSWorldMacBenchmarkCaseResultFromSummary(id string, summary map[string]interface{}) CUABenchmarkCaseResult
type CUABenchmarkReport
- func EvaluateCUABenchmarkReport(results []CUABenchmarkCaseResult) CUABenchmarkReport
type CUAOSWorldMacBenchmarkAssets
type CanonicalSkillSourceState
- func ResolveCanonicalSkillSourceState(skillID string, sourcePath string) (*CanonicalSkillSourceState, error)
- func ResolveWritableSkillSourceState(skillID string, sourcePath string) (*CanonicalSkillSourceState, error)
type CheckpointArtifact
type CompareEvalRunRequest
type ComparisonCaseDelta
type ComparisonReport
type Controller
- func NewController(store *SQLiteStore, resolver *PolicyResolver) *Controller
- func (c *Controller) AppendEvent(ctx context.Context, event RunEvent) error
- func (c *Controller) AttachArtifact(ctx context.Context, ref ArtifactRef) error
- func (c *Controller) BuildEvolutionOverview(ctx context.Context, skillID string, ownerUserID string) (*EvolutionOverview, error)
- func (c *Controller) BuildSkillEvolutionCaseDetail(ctx context.Context, evolutionCase *SkillEvolutionCase) (*SkillEvolutionCaseDetail, error)
- func (c *Controller) Cancel(ctx context.Context, id string, reason string) error
- func (c *Controller) CancelEvalRun(ctx context.Context, id string, reason string) error
- func (c *Controller) CancelGroup(ctx context.Context, id string, reason string) error
- func (c *Controller) CompareEvalRun(ctx context.Context, targetEvalRunID string, req CompareEvalRunRequest) (*ComparisonReport, error)
- func (c *Controller) CreateBaseline(ctx context.Context, spec BaselineSpec) (*Baseline, error)
- func (c *Controller) CreateDataset(ctx context.Context, spec DatasetSpec) (*Dataset, error)
- func (c *Controller) CreateDatasetVersion(ctx context.Context, datasetID string, spec DatasetVersionSpec) (*DatasetVersion, error)
- func (c *Controller) CreateEvalSpec(ctx context.Context, spec EvalSpecSpec) (*EvalSpec, error)
- func (c *Controller) CreateSkillEvolutionCase(ctx context.Context, evolutionCase SkillEvolutionCase) (*SkillEvolutionCase, error)
- func (c *Controller) CreateSkillRevision(ctx context.Context, revision SkillRevision) (*SkillRevision, error)
- func (c *Controller) Delete(ctx context.Context, id string) error
- func (c *Controller) EnsureBatch1ExecutionAssets(ctx context.Context, ownerUserID string) (*Batch1ExecutionAssets, error)
- func (c *Controller) EnsureDesktopChatSuccessRateAssets(ctx context.Context, ownerUserID string) (*DesktopChatSuccessRateAssets, error)
- func (c *Controller) EnsureSelectorCuratedAssets(ctx context.Context, ownerUserID string) (*SelectorCuratedAssets, error)
- func (c *Controller) EnsureSkillEvolutionCase(ctx context.Context, spec SkillEvolutionCaseSpec) (*SkillEvolutionCase, bool, error)
- func (c *Controller) EvaluateExecutionEquivalence(ctx context.Context, targetEvalRunID string, req ExecutionEquivalenceRequest) (*ExecutionEquivalenceReport, error)
- func (c *Controller) EvaluateSelectorGate(ctx context.Context, targetEvalRunID string, req SelectorGateRequest) (*SelectorGateReport, error)
- func (c *Controller) EvaluateSkillCutoverBudgetGate(ctx context.Context, targetEvalRunID string, req SkillCutoverBudgetRequest) (*SkillCutoverBudgetReport, error)
- func (c *Controller) EvaluateSkillCutoverReadiness(ctx context.Context, req SkillCutoverReadinessRequest) (*SkillCutoverReadinessReport, error)
- func (c *Controller) ExecutionMiddlewares() []ExecutionMiddleware
- func (c *Controller) FindRunByMetadata(ctx context.Context, kind RunKind, key string, value string) (*Run, error)
- func (c *Controller) Get(ctx context.Context, id string) (*Run, error)
- func (c *Controller) GetComparisonReport(ctx context.Context, id string) (*ComparisonReport, error)
- func (c *Controller) GetDataset(ctx context.Context, id string) (*Dataset, error)
- func (c *Controller) GetDatasetVersion(ctx context.Context, id string) (*DatasetVersion, error)
- func (c *Controller) GetEvalRun(ctx context.Context, id string) (*EvalRun, error)
- func (c *Controller) GetEvalRunReport(ctx context.Context, id string) (*EvalRunReport, error)
- func (c *Controller) GetEvalSpec(ctx context.Context, id string) (*EvalSpec, error)
- func (c *Controller) GetGroup(ctx context.Context, id string) (*RunGroup, error)
- func (c *Controller) GetGroupReport(ctx context.Context, id string) (*RunGroupReport, error)
- func (c *Controller) GetRegisteredDriver(kind RunKind) Driver
- func (c *Controller) GetSkillEvolutionCase(ctx context.Context, id string) (*SkillEvolutionCase, error)
- func (c *Controller) GetSkillEvolutionCaseDetail(ctx context.Context, id string) (*SkillEvolutionCaseDetail, error)
- func (c *Controller) GetSkillRevision(ctx context.Context, id string) (*SkillRevision, error)
- func (c *Controller) GetStored(ctx context.Context, id string) (*Run, error)
- func (c *Controller) ImportDatasetBundle(ctx context.Context, req ImportDatasetBundleRequest) (*ImportDatasetBundleResult, error)
- func (c *Controller) List(ctx context.Context, filter RunFilter) ([]Run, error)
- func (c *Controller) ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)
- func (c *Controller) ListBaselines(ctx context.Context, filter BaselineFilter) ([]Baseline, error)
- func (c *Controller) ListDatasetVersions(ctx context.Context, datasetID string, limit int) ([]DatasetVersion, error)
- func (c *Controller) ListDatasets(ctx context.Context, filter DatasetFilter) ([]Dataset, error)
- func (c *Controller) ListEvalRuns(ctx context.Context, filter EvalRunFilter) ([]EvalRun, error)
- func (c *Controller) ListEvalSpecs(ctx context.Context, filter EvalSpecFilter) ([]EvalSpec, error)
- func (c *Controller) ListEvents(ctx context.Context, runID string, limit int) ([]RunEvent, error)
- func (c *Controller) ListGroupItems(ctx context.Context, groupID string) ([]RunGroupItem, error)
- func (c *Controller) ListGroups(ctx context.Context, filter RunGroupFilter) ([]RunGroup, error)
- func (c *Controller) ListOne(ctx context.Context, id string) (*Run, error)
- func (c *Controller) ListSkillDecisionHistory(ctx context.Context, filter SkillDecisionHistoryFilter) ([]SkillDecisionHistoryEntry, error)
- func (c *Controller) ListSkillEvolutionCases(ctx context.Context, filter SkillEvolutionCaseFilter) ([]SkillEvolutionCase, error)
- func (c *Controller) ListSkillRevisions(ctx context.Context, filter SkillRevisionFilter) ([]SkillRevision, error)
- func (c *Controller) OptimizeSkill(ctx context.Context, skillID string, req SkillOptimizeRequest) (*OptimizationTrigger, error)
- func (c *Controller) PerformAction(ctx context.Context, id string, action string, input map[string]interface{}) (*Run, error)
- func (c *Controller) PerformGroupAction(ctx context.Context, id string, action string, input map[string]interface{}) (*RunGroup, error)
- func (c *Controller) PromoteGroup(ctx context.Context, groupID string, spec GroupPromotionSpec) (*GroupPromotionResult, error)
- func (c *Controller) PromoteSkillRevision(ctx context.Context, revisionID string, decision SkillRevisionDecisionRequest) (*SkillPromoteResult, error)
- func (c *Controller) RegisterDriver(driver Driver)
- func (c *Controller) RetryFailedGroup(ctx context.Context, id string) (int, error)
- func (c *Controller) RollbackSkillRevision(ctx context.Context, revisionID string, decision SkillRevisionDecisionRequest) (*SkillPromoteResult, error)
- func (c *Controller) RunTraceSnapshot(ctx context.Context, runID string) (*RunTrace, error)
- func (c *Controller) SetJudgeEvaluator(evaluator JudgeEvaluator)
- func (c *Controller) SetOptimizationTriggerer(triggerer OptimizationTriggerer)
- func (c *Controller) SetReflector(reflector ProposalReflector)
- func (c *Controller) SetRunTraceProvider(provider RunTraceProvider)
- func (c *Controller) SetRuntimeReflectionCoordinator(coordinator *RuntimeReflectionCoordinator)
- func (c *Controller) SpawnChild(ctx context.Context, parentID string, spec RunSpec) (*Run, error)
- func (c *Controller) Submit(ctx context.Context, spec RunSpec) (*Run, error)
- func (c *Controller) SubmitEvalRun(ctx context.Context, spec EvalRunSpec) (*EvalRun, error)
- func (c *Controller) SubmitGroup(ctx context.Context, spec RunGroupSpec) (*RunGroup, error)
- func (c *Controller) SyncSnapshot(ctx context.Context, snapshot *Run) error
- func (c *Controller) UpdateSkillEvolutionCase(ctx context.Context, evolutionCase SkillEvolutionCase) (*SkillEvolutionCase, error)
- func (c *Controller) UpdateSkillRevision(ctx context.Context, revision SkillRevision) (*SkillRevision, error)
- func (c *Controller) UseExecutionMiddleware(mw ExecutionMiddleware)
type Dataset
type DatasetBundleSourcePreview
type DatasetBundleVersionPreview
type DatasetFilter
type DatasetManifest
- func Batch1ExecutionDatasetManifest() DatasetManifest
- func CUAOSWorldMacBenchmarkManifest() DatasetManifest
- func DesktopChatSuccessRateDatasetManifest() DatasetManifest
- func SelectorCuratedDatasetManifest() DatasetManifest
type DatasetManifestDefaults
type DatasetManifestItem
type DatasetManifestMeta
type DatasetSpec
- func Batch1ExecutionDatasetSpec(ownerUserID string) DatasetSpec
- func CUAOSWorldMacBenchmarkDatasetSpec(ownerUserID string) DatasetSpec
- func DesktopChatSuccessRateDatasetSpec(ownerUserID string) DatasetSpec
- func SelectorCuratedDatasetSpec(ownerUserID string) DatasetSpec
type DatasetVersion
- func (v *DatasetVersion) DecodeManifest() (*DatasetManifest, error)
type DatasetVersionSpec
- func Batch1ExecutionDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
- func CUAOSWorldMacBenchmarkDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
- func DesktopChatSuccessRateDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
- func SelectorCuratedDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
type Defaults
type DesktopChatSuccessRateAssets
type DesktopChatSuccessRateGateCheck
type DesktopChatSuccessRateGateReport
- func EvaluateDesktopChatSuccessRateGate(summary map[string]interface{}, existingFocusedRegressionPassed bool) DesktopChatSuccessRateGateReport
type Driver
type EvalRun
type EvalRunFilter
type EvalRunReport
type EvalRunSpec
type EvalSpec
type EvalSpecFilter
type EvalSpecSpec
- func Batch1ExecutionEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
- func CUAOSWorldMacBenchmarkEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
- func DesktopChatSuccessRateEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
- func SelectorCuratedEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
type EvolutionOverview
type EvolutionOverviewInstructionCounts
type EvolutionOverviewRevisionCounts
type EvolutionProposalSummaryProvider
type ExecPathGuard
- func NewExecPathGuard(manager *Controller) *ExecPathGuard
- func (g *ExecPathGuard) CheckExecPath(ctx context.Context, absPath string) error
- func (g *ExecPathGuard) CheckExecWorkdir(ctx context.Context, absWorkdir string) error
type ExecutionEquivalenceCheck
type ExecutionEquivalenceMetrics
type ExecutionEquivalenceReport
type ExecutionEquivalenceRequest
type ExecutionEquivalenceSegmentMetrics
type ExecutionEquivalenceThresholds
- func DefaultExecutionEquivalenceThresholds() ExecutionEquivalenceThresholds
type ExecutionMiddleware
- func NewSkillCandidateMiddleware() ExecutionMiddleware
type ExecutionMiddlewareHooks
- func (h ExecutionMiddlewareHooks) AfterStart(ctx context.Context, runCtx *RunContext)
- func (h ExecutionMiddlewareHooks) BeforeStart(ctx context.Context, runCtx *RunContext) error
- func (h ExecutionMiddlewareHooks) OnStartError(ctx context.Context, runCtx *RunContext, runErr error)
type GroupDispatcher
- func NewGroupDispatcher(manager *Controller) *GroupDispatcher
- func (d *GroupDispatcher) DispatchOnce(ctx context.Context) error
- func (d *GroupDispatcher) SetPollInterval(interval time.Duration)
- func (d *GroupDispatcher) SetRunPollInterval(interval time.Duration)
- func (d *GroupDispatcher) Start(ctx context.Context)
type GroupPromotionResult
type GroupPromotionSpec
type GroupSchedulerConfig
type GroupScoringConfig
type GuardPipelineError
- func (e *GuardPipelineError) Error() string
- func (e *GuardPipelineError) ToolRuntimeCode() string
- func (e *GuardPipelineError) ToolRuntimeDetails() map[string]interface{}
- func (e *GuardPipelineError) Unwrap() error
type Handler
- func NewHandler(manager *Controller) *Handler
- func (h *Handler) CancelEvalRun(c echo.Context) error
- func (h *Handler) CancelGroup(c echo.Context) error
- func (h *Handler) CancelRun(c echo.Context) error
- func (h *Handler) CompareEvalRun(c echo.Context) error
- func (h *Handler) CreateBaseline(c echo.Context) error
- func (h *Handler) CreateDataset(c echo.Context) error
- func (h *Handler) CreateDatasetVersion(c echo.Context) error
- func (h *Handler) CreateEvalRun(c echo.Context) error
- func (h *Handler) CreateEvalSpec(c echo.Context) error
- func (h *Handler) CreateGroup(c echo.Context) error
- func (h *Handler) CreateRun(c echo.Context) error
- func (h *Handler) EnsureBatch1ExecutionAssets(c echo.Context) error
- func (h *Handler) EnsureSelectorCuratedAssets(c echo.Context) error
- func (h *Handler) EvaluateExecutionEquivalence(c echo.Context) error
- func (h *Handler) EvaluateSelectorGate(c echo.Context) error
- func (h *Handler) EvaluateSkillCutoverBudgetGate(c echo.Context) error
- func (h *Handler) EvaluateSkillCutoverReadiness(c echo.Context) error
- func (h *Handler) GetComparisonReport(c echo.Context) error
- func (h *Handler) GetDataset(c echo.Context) error
- func (h *Handler) GetDatasetVersion(c echo.Context) error
- func (h *Handler) GetEvalRun(c echo.Context) error
- func (h *Handler) GetEvalRunReport(c echo.Context) error
- func (h *Handler) GetEvalSpec(c echo.Context) error
- func (h *Handler) GetEvolutionOverview(c echo.Context) error
- func (h *Handler) GetGroup(c echo.Context) error
- func (h *Handler) GetGroupReport(c echo.Context) error
- func (h *Handler) GetRun(c echo.Context) error
- func (h *Handler) GetRunDetail(c echo.Context) error
- func (h *Handler) GetSkillEvolutionCase(c echo.Context) error
- func (h *Handler) GetSkillRevision(c echo.Context) error
- func (h *Handler) ImportDatasetBundle(c echo.Context) error
- func (h *Handler) ImportDatasetBundleFromSource(c echo.Context) error
- func (h *Handler) ListArtifacts(c echo.Context) error
- func (h *Handler) ListBaselines(c echo.Context) error
- func (h *Handler) ListDatasetVersions(c echo.Context) error
- func (h *Handler) ListDatasets(c echo.Context) error
- func (h *Handler) ListEvalRuns(c echo.Context) error
- func (h *Handler) ListEvalSpecs(c echo.Context) error
- func (h *Handler) ListEvents(c echo.Context) error
- func (h *Handler) ListGroupItems(c echo.Context) error
- func (h *Handler) ListGroups(c echo.Context) error
- func (h *Handler) ListRuns(c echo.Context) error
- func (h *Handler) ListSkillDecisionHistory(c echo.Context) error
- func (h *Handler) ListSkillEvolutionCases(c echo.Context) error
- func (h *Handler) ListSkillRevisions(c echo.Context) error
- func (h *Handler) OptimizeSkill(c echo.Context) error
- func (h *Handler) PerformGroupAction(c echo.Context) error
- func (h *Handler) PerformRunAction(c echo.Context) error
- func (h *Handler) PreviewDatasetBundleFromSource(c echo.Context) error
- func (h *Handler) PromoteGroup(c echo.Context) error
- func (h *Handler) PromoteSkillRevision(c echo.Context) error
- func (h *Handler) RegisterRoutes(g *echo.Group)
- func (h *Handler) RetryFailedGroup(c echo.Context) error
- func (h *Handler) RollbackSkillRevision(c echo.Context) error
- func (h *Handler) SetDetailProvider(provider RunDetailProvider)
- func (h *Handler) SetEvolutionProposalSummaryProvider(provider EvolutionProposalSummaryProvider)
type HarnessAPICheck
type HarnessAdaptivePolicy
- func DeriveHarnessAdaptivePolicy(model string, kind RunKind, contract HarnessContract) HarnessAdaptivePolicy
type HarnessBrowserCheck
type HarnessCheckpoint
type HarnessContract
- func DecodeHarnessContract(sources ...map[string]interface{}) HarnessContract
type HarnessExpectedArtifact
type HarnessSubagentExecutor
- func NewSubagentExecutor(manager *Controller, agents *config.AgentsConfig) *HarnessSubagentExecutor
- func (e *HarnessSubagentExecutor) ExecuteIsolated(ctx context.Context, req tools.SubagentRequest) (*tools.SubagentResult, error)
- func (e *HarnessSubagentExecutor) ExecuteSubagent(ctx context.Context, req tools.SubagentRequest) (*tools.SubagentResult, error)
type HarnessVerificationResult
type ImportDatasetBundleEvalSpec
type ImportDatasetBundleFromSourceRequest
type ImportDatasetBundleRequest
type ImportDatasetBundleResult
type JudgeEvaluationRequest
type JudgeEvaluationResult
type JudgeEvaluator
type LLMJudgeEvaluator
- func NewLLMJudgeEvaluator(llmCaller judgeLLMCaller) *LLMJudgeEvaluator
- func (e *LLMJudgeEvaluator) Evaluate(ctx context.Context, req JudgeEvaluationRequest) (*JudgeEvaluationResult, error)
type LegacyStoreMigrationResult
- func MigrateLegacyStore(ctx context.Context, db *sql.DB, dataDir string) (*LegacyStoreMigrationResult, error)
type Manager
type OptimizationReason
type OptimizationSurface
type OptimizationTrigger
type OptimizationTriggerer
type PolicyResolver
- func NewPolicyResolver(cfg config.HarnessConfig, agents *config.AgentsConfig) *PolicyResolver
- func (r *PolicyResolver) ArtifactRoot(runID string) string
- func (r *PolicyResolver) IsProtectedPath(path string, artifactRoot string) bool
- func (r *PolicyResolver) Resolve(spec RunSpec) RunSpec
- func (r *PolicyResolver) ResolveChild(parent *Run, spec RunSpec) (RunSpec, error)
type ProposalReflector
type Run
type RunActionAvailability
type RunContext
- func GetRunContext(ctx context.Context) *RunContext
type RunDetail
type RunDetailProvider
type RunEnv
type RunEvent
type RunFilter
type RunGroup
type RunGroupFilter
type RunGroupItem
type RunGroupItemSpec
type RunGroupItemStatus
type RunGroupKind
type RunGroupReport
type RunGroupSpec
type RunGroupStatus
type RunKind
type RunSpec
type RunStatus
type RunTrace
type RunTraceCollector
- func NewRunTraceCollector(manager *Controller) *RunTraceCollector
- func (c *RunTraceCollector) Middleware() ExecutionMiddleware
- func (c *RunTraceCollector) Snapshot(ctx context.Context, runID string) (*RunTrace, error)
type RunTraceEvent
type RunTraceProvider
type RunTraceStage
type RuntimeEvidenceEntry
type RuntimeEvidenceProvider
type RuntimeObserver
- func NewRuntimeObserver(manager *Controller) *RuntimeObserver
- func (o *RuntimeObserver) OnApprovalRequested(event tools.ApprovalRuntimeEvent)
- func (o *RuntimeObserver) OnApprovalResolved(event tools.ApprovalRuntimeEvent)
- func (o *RuntimeObserver) OnQuestionRequested(event tools.QuestionRuntimeEvent)
- func (o *RuntimeObserver) OnQuestionResolved(event tools.QuestionRuntimeEvent)
- func (o *RuntimeObserver) OnToolFinished(event tools.ToolRuntimeEvent)
- func (o *RuntimeObserver) OnToolRequested(event tools.ToolRuntimeEvent)
type RuntimeReflectionCoordinator
- func NewRuntimeReflectionCoordinator(controller *Controller, cfg config.RuntimeReflectionConfig) *RuntimeReflectionCoordinator
- func (c *RuntimeReflectionCoordinator) OnApprovalRequested(event tools.ApprovalRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnApprovalResolved(event tools.ApprovalRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnQuestionRequested(event tools.QuestionRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnQuestionResolved(event tools.QuestionRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnRunTerminal(ctx context.Context, run *Run)
- func (c *RuntimeReflectionCoordinator) OnToolFinished(event tools.ToolRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnToolRequested(event tools.ToolRuntimeEvent)
type RuntimeStage
type SQLiteStore
- func NewSQLiteStore(db *sql.DB) (*SQLiteStore, error)
- func NewSQLiteStoreWithReadDB(writeDB, readDB *sql.DB) (*SQLiteStore, error)
- func (s *SQLiteStore) AppendEvent(ctx context.Context, event RunEvent) error
- func (s *SQLiteStore) AttachArtifact(ctx context.Context, ref ArtifactRef) error
- func (s *SQLiteStore) AttachScorecard(ctx context.Context, scorecard Scorecard) error
- func (s *SQLiteStore) BuildEvolutionOverview(ctx context.Context, skillID string, _ string) (*EvolutionOverview, error)
- func (s *SQLiteStore) ClaimNextGroupItem(ctx context.Context, groupID, workerID string, leaseTTL time.Duration, ...) (*RunGroupItem, error)
- func (s *SQLiteStore) ClearDefaultBaseline(ctx context.Context, evalSpecID string) error
- func (s *SQLiteStore) CountGroupItemsByStatuses(ctx context.Context, groupID string, statuses []RunGroupItemStatus) (int, error)
- func (s *SQLiteStore) CreateBaseline(ctx context.Context, baseline *Baseline) error
- func (s *SQLiteStore) CreateComparisonReport(ctx context.Context, report *ComparisonReport) error
- func (s *SQLiteStore) CreateDataset(ctx context.Context, dataset *Dataset) error
- func (s *SQLiteStore) CreateDatasetVersion(ctx context.Context, version *DatasetVersion) error
- func (s *SQLiteStore) CreateEvalRun(ctx context.Context, evalRun *EvalRun) error
- func (s *SQLiteStore) CreateEvalSpec(ctx context.Context, spec *EvalSpec) error
- func (s *SQLiteStore) CreateGroup(ctx context.Context, group *RunGroup) error
- func (s *SQLiteStore) CreateGroupItems(ctx context.Context, items []RunGroupItem) error
- func (s *SQLiteStore) CreateRun(ctx context.Context, run *Run) error
- func (s *SQLiteStore) CreateSkillEvolutionCase(ctx context.Context, evolutionCase *SkillEvolutionCase) error
- func (s *SQLiteStore) CreateSkillRevision(ctx context.Context, revision *SkillRevision) error
- func (s *SQLiteStore) DeleteRun(ctx context.Context, id string) error
- func (s *SQLiteStore) FindDatasetByOwnerAndName(ctx context.Context, ownerUserID, name string) (*Dataset, error)
- func (s *SQLiteStore) FindDatasetVersionByDatasetAndVersion(ctx context.Context, datasetID, version string) (*DatasetVersion, error)
- func (s *SQLiteStore) FindEvalSpecByOwnerDatasetAndName(ctx context.Context, ownerUserID, datasetID, name string) (*EvalSpec, error)
- func (s *SQLiteStore) FindLatestSkillEvolutionCaseByDedupKey(ctx context.Context, skillID string, ownerUserID string, dedupKey string) (*SkillEvolutionCase, error)
- func (s *SQLiteStore) FindRunByMetadata(ctx context.Context, kind RunKind, key, value string) (*Run, error)
- func (s *SQLiteStore) GetBaseline(ctx context.Context, id string) (*Baseline, error)
- func (s *SQLiteStore) GetComparisonReport(ctx context.Context, id string) (*ComparisonReport, error)
- func (s *SQLiteStore) GetDataset(ctx context.Context, id string) (*Dataset, error)
- func (s *SQLiteStore) GetDatasetVersion(ctx context.Context, id string) (*DatasetVersion, error)
- func (s *SQLiteStore) GetEvalRun(ctx context.Context, id string) (*EvalRun, error)
- func (s *SQLiteStore) GetEvalSpec(ctx context.Context, id string) (*EvalSpec, error)
- func (s *SQLiteStore) GetGroup(ctx context.Context, id string) (*RunGroup, error)
- func (s *SQLiteStore) GetGroupItem(ctx context.Context, id string) (*RunGroupItem, error)
- func (s *SQLiteStore) GetRun(ctx context.Context, id string) (*Run, error)
- func (s *SQLiteStore) GetSkillEvolutionCase(ctx context.Context, id string) (*SkillEvolutionCase, error)
- func (s *SQLiteStore) GetSkillRevision(ctx context.Context, id string) (*SkillRevision, error)
- func (s *SQLiteStore) LatestScorecardForItem(ctx context.Context, groupItemID string) (*Scorecard, error)
- func (s *SQLiteStore) ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)
- func (s *SQLiteStore) ListBaselines(ctx context.Context, filter BaselineFilter) ([]Baseline, error)
- func (s *SQLiteStore) ListDatasetVersions(ctx context.Context, datasetID string, limit int) ([]DatasetVersion, error)
- func (s *SQLiteStore) ListDatasets(ctx context.Context, filter DatasetFilter) ([]Dataset, error)
- func (s *SQLiteStore) ListEvalRuns(ctx context.Context, filter EvalRunFilter) ([]EvalRun, error)
- func (s *SQLiteStore) ListEvalSpecs(ctx context.Context, filter EvalSpecFilter) ([]EvalSpec, error)
- func (s *SQLiteStore) ListEvents(ctx context.Context, runID string, limit int) ([]RunEvent, error)
- func (s *SQLiteStore) ListGroupItems(ctx context.Context, groupID string) ([]RunGroupItem, error)
- func (s *SQLiteStore) ListGroups(ctx context.Context, filter RunGroupFilter) ([]RunGroup, error)
- func (s *SQLiteStore) ListRuns(ctx context.Context, filter RunFilter) ([]Run, error)
- func (s *SQLiteStore) ListScorecards(ctx context.Context, groupID string) ([]Scorecard, error)
- func (s *SQLiteStore) ListSkillDecisionHistory(ctx context.Context, filter SkillDecisionHistoryFilter) ([]SkillDecisionHistoryEntry, error)
- func (s *SQLiteStore) ListSkillEvolutionCases(ctx context.Context, filter SkillEvolutionCaseFilter) ([]SkillEvolutionCase, error)
- func (s *SQLiteStore) ListSkillRevisions(ctx context.Context, filter SkillRevisionFilter) ([]SkillRevision, error)
- func (s *SQLiteStore) UpdateDataset(ctx context.Context, dataset *Dataset) error
- func (s *SQLiteStore) UpdateEvalRun(ctx context.Context, evalRun *EvalRun) error
- func (s *SQLiteStore) UpdateEvalSpec(ctx context.Context, spec *EvalSpec) error
- func (s *SQLiteStore) UpdateGroup(ctx context.Context, group *RunGroup) error
- func (s *SQLiteStore) UpdateGroupItem(ctx context.Context, item *RunGroupItem) error
- func (s *SQLiteStore) UpdateRun(ctx context.Context, run *Run) error
- func (s *SQLiteStore) UpdateRunIfMaterialStateMatches(ctx context.Context, expected *Run, next *Run) (bool, error)
- func (s *SQLiteStore) UpdateSkillEvolutionCase(ctx context.Context, evolutionCase *SkillEvolutionCase) error
- func (s *SQLiteStore) UpdateSkillRevision(ctx context.Context, revision *SkillRevision) error
type ScoreVerdict
type Scorecard
type ScoringMode
type SelectorCuratedAssets
type SelectorGateCheck
type SelectorGateMetrics
type SelectorGateReport
type SelectorGateRequest
type SelectorGateSegmentMetrics
type SelectorGateThresholds
- func DefaultSelectorGateThresholds() SelectorGateThresholds
type SkillCutoverBudgetCheck
type SkillCutoverBudgetMetrics
type SkillCutoverBudgetReport
type SkillCutoverBudgetRequest
type SkillCutoverBudgetThresholds
type SkillCutoverLaneAssessment
type SkillCutoverLaneReadiness
type SkillCutoverReadinessReport
type SkillCutoverReadinessRequest
type SkillDecisionHistoryEntry
type SkillDecisionHistoryFilter
type SkillEvolutionCase
type SkillEvolutionCaseDetail
type SkillEvolutionCaseFilter
type SkillEvolutionCaseSpec
type SkillEvolutionCaseStatus
type SkillEvolutionMode
type SkillEvolutionReason
type SkillOptimizeRequest
type SkillPromoteResult
type SkillRevision
type SkillRevisionDecisionAction
type SkillRevisionDecisionRequest
type SkillRevisionFilter
type SkillRevisionPromotionRecorder
type SkillRevisionStatus
type SnapshotDriver
type Store
type UserTaskActions
type UserTaskArtifact
type UserTaskBlocker
type UserTaskProjection
type UserTaskProjectionFilter
type UserTaskProjectionHandler
- func NewUserTaskProjectionHandler(manager *Controller, detailProvider RunDetailProvider) *UserTaskProjectionHandler
- func (h *UserTaskProjectionHandler) CancelTask(c echo.Context) error
- func (h *UserTaskProjectionHandler) GetTask(c echo.Context) error
- func (h *UserTaskProjectionHandler) PerformTaskAction(c echo.Context) error
- func (h *UserTaskProjectionHandler) RegisterRoutes(g *echo.Group)
- func (h *UserTaskProjectionHandler) ResumeTask(c echo.Context) error
type UserTaskProjectionService
- func NewUserTaskProjectionService(manager *Controller, detailProvider RunDetailProvider) *UserTaskProjectionService
- func (s *UserTaskProjectionService) Get(ctx context.Context, runID string, scope string) (*UserTaskProjection, error)
- func (s *UserTaskProjectionService) List(ctx context.Context, filter UserTaskProjectionFilter) ([]UserTaskProjection, error)
type UserTaskResearchSource
type UserTaskSubagentSummary
type WritePathGuard
- func NewWritePathGuard(manager *Controller) *WritePathGuard
- func (g *WritePathGuard) CheckWritePath(ctx context.Context, absPath string) error

Constants ¶

View Source

const (
	CUAOSWorldMacBenchmarkDatasetName        = "computer-use-cua-osworld-macos"
	CUAOSWorldMacBenchmarkDatasetDescription = "OSWorld-style macOS computer-use benchmark profile for Go-native CUA mode."
	CUAOSWorldMacBenchmarkDatasetSubject     = "computer_use_cua_osworld_macos"
	CUAOSWorldMacBenchmarkDatasetVersion     = "computer-use-cua-osworld-macos-v1"
	CUAOSWorldMacBenchmarkEvalName           = "Computer-Use Go-Native CUA OSWorld-Style macOS Benchmark"
	CUAOSWorldMacBenchmarkProfile            = "computer_use_cua_osworld_macos"
)

View Source

const (
	DesktopChatSuccessRateDatasetName        = "computer-use-desktop-chat-macos"
	DesktopChatSuccessRateDatasetDescription = "Curated macOS desktop-chat success-rate dataset for computer_use task trajectories."
	DesktopChatSuccessRateDatasetSubject     = "computer_use_desktop_chat"
	DesktopChatSuccessRateDatasetVersion     = "computer-use-desktop-chat-macos-v1"
	DesktopChatSuccessRateEvalName           = "Computer-Use macOS Desktop Chat Success Rate"
)

View Source

const (
	Batch1ExecutionDatasetName        = "skill-exec-batch1"
	Batch1ExecutionDatasetDescription = "Curated batch-1 execution equivalence dataset for tool-to-skill migration."
	Batch1ExecutionDatasetSubject     = "skill_execution_batch1"
	Batch1ExecutionDatasetVersion     = "skill-exec-batch1-v6"
	Batch1ExecutionEvalName           = "Skill Execution Batch 1"
)

View Source

const (
	SelectorCuratedDatasetName        = "selector-curated"
	SelectorCuratedDatasetDescription = "Curated selector dry-run routing regression dataset."
	SelectorCuratedDatasetSubject     = "selector_dry_run"
	SelectorCuratedDatasetVersion     = "selector-curated-v4"
	SelectorCuratedEvalName           = "Selector Curated Dry Run"
)

View Source

const LegacyStoreDBFilename = "harness.db"

Variables ¶

This section is empty.

Functions ¶

func ApplyHarnessContractToExpected ¶

func ApplyHarnessContractToExpected(expected map[string]interface{}, contract HarnessContract) map[string]interface{}

func Batch1ExecutionCaseCount ¶

func Batch1ExecutionCaseCount() int

func BuildHarnessCheckpointContext ¶

func BuildHarnessCheckpointContext(raw map[string]interface{}) string

func BuildHarnessContractContext ¶

func BuildHarnessContractContext(contract HarnessContract) string

func DefaultDatasetBundleGitHubPathForRepo ¶

func DefaultDatasetBundleGitHubPathForRepo(owner, repo string) string

func DesktopChatSuccessRateCaseCount ¶

func DesktopChatSuccessRateCaseCount() int

func HarnessAdaptivePolicyMetadata ¶

func HarnessAdaptivePolicyMetadata(policy HarnessAdaptivePolicy) map[string]interface{}

func HarnessContractFallbackPlan ¶

func HarnessContractFallbackPlan(contract HarnessContract) []string

func HarnessContractMetadata ¶

func HarnessContractMetadata(contract HarnessContract) map[string]interface{}

func HarnessContractSuccessCriteria ¶

func HarnessContractSuccessCriteria(contract HarnessContract) []string

func LegacyStoreDBPath ¶

func LegacyStoreDBPath(dataDir string) string

LegacyStoreDBPath returns the legacy standalone harness database path.

func NewRuntimeEventObserverMux ¶

func NewRuntimeEventObserverMux(observers ...tools.RuntimeEventObserver) tools.RuntimeEventObserver

func SelectorCuratedCaseCount ¶

func SelectorCuratedCaseCount() int

SelectorCuratedCaseCount returns the stable case count for the built-in selector routing dataset.

func WithRunContext ¶

func WithRunContext(ctx context.Context, runCtx *RunContext) context.Context

WithRunContext annotates a context with the current run-scoped execution envelope.

Types ¶

type ActionDescriptor ¶

type ActionDescriptor struct {
	ID            string                 `json:"id"`
	Label         string                 `json:"label"`
	Method        string                 `json:"method"`
	Path          string                 `json:"path"`
	Variant       string                 `json:"variant,omitempty"`
	RequiresInput bool                   `json:"requires_input,omitempty"`
	Input         *ActionInputDescriptor `json:"input,omitempty"`
}

type ActionDriver ¶

type ActionDriver interface {
	PerformAction(ctx context.Context, run *Run, action string, input map[string]interface{}) (*Run, error)
}

type ActionInputDescriptor ¶

type ActionInputDescriptor struct {
	Title       string                       `json:"title,omitempty"`
	Description string                       `json:"description,omitempty"`
	SubmitLabel string                       `json:"submit_label,omitempty"`
	Fields      []ActionInputFieldDescriptor `json:"fields,omitempty"`
}

type ActionInputFieldDescriptor ¶

type ActionInputFieldDescriptor struct {
	Key         string   `json:"key"`
	Label       string   `json:"label"`
	Kind        string   `json:"kind,omitempty"`
	Target      string   `json:"target,omitempty"`
	PayloadKey  string   `json:"payload_key,omitempty"`
	Required    bool     `json:"required,omitempty"`
	Placeholder string   `json:"placeholder,omitempty"`
	Options     []string `json:"options,omitempty"`
}

type AgentCompatHandler ¶

type AgentCompatHandler struct {
	// contains filtered or unexported fields
}

func NewAgentCompatHandler ¶

func NewAgentCompatHandler(manager *Controller, store *agentpkg.Store, runner *agentpkg.Runner, defaultWorkspaceRoot string) *AgentCompatHandler

func (*AgentCompatHandler) CancelTask ¶

func (h *AgentCompatHandler) CancelTask(c echo.Context) error

func (*AgentCompatHandler) CreateTask ¶

func (h *AgentCompatHandler) CreateTask(c echo.Context) error

func (*AgentCompatHandler) DeleteTask ¶

func (h *AgentCompatHandler) DeleteTask(c echo.Context) error

func (*AgentCompatHandler) RegisterRoutes ¶

func (h *AgentCompatHandler) RegisterRoutes(g *echo.Group)

func (*AgentCompatHandler) SendMessage ¶

func (h *AgentCompatHandler) SendMessage(c echo.Context) error

func (*AgentCompatHandler) SubmitAnswer ¶

func (h *AgentCompatHandler) SubmitAnswer(c echo.Context) error

type ApprovalMode ¶

type ApprovalMode string

const (
	ApprovalModeAsk   ApprovalMode = "ask"
	ApprovalModeAllow ApprovalMode = "allow"
	ApprovalModeDeny  ApprovalMode = "deny"
)

type ArtifactRef ¶

type ArtifactRef struct {
	ID           string `json:"id"`
	RunID        string `json:"run_id"`
	Kind         string `json:"kind"`
	Label        string `json:"label,omitempty"`
	PathOrURL    string `json:"path_or_url,omitempty"`
	MIMEType     string `json:"mime_type,omitempty"`
	SizeBytes    int64  `json:"size_bytes,omitempty"`
	MetadataJSON string `json:"metadata_json,omitempty"`
}

type Baseline ¶

type Baseline struct {
	ID          string                 `json:"id"`
	Name        string                 `json:"name"`
	Subject     string                 `json:"subject,omitempty"`
	OwnerUserID string                 `json:"owner_user_id,omitempty"`
	EvalSpecID  string                 `json:"eval_spec_id"`
	EvalRunID   string                 `json:"eval_run_id"`
	IsDefault   bool                   `json:"is_default"`
	Metadata    map[string]interface{} `json:"metadata,omitempty"`
	CreatedAt   time.Time              `json:"created_at"`
	UpdatedAt   time.Time              `json:"updated_at"`
}

type BaselineFilter ¶

type BaselineFilter struct {
	OwnerUserID string
	EvalSpecID  string
	Limit       int
}

type BaselineSpec ¶

type BaselineSpec struct {
	Name        string                 `json:"name"`
	Subject     string                 `json:"subject,omitempty"`
	OwnerUserID string                 `json:"owner_user_id,omitempty"`
	EvalSpecID  string                 `json:"eval_spec_id,omitempty"`
	EvalRunID   string                 `json:"eval_run_id"`
	IsDefault   bool                   `json:"is_default,omitempty"`
	Metadata    map[string]interface{} `json:"metadata,omitempty"`
}

type Batch1ExecutionAssets ¶

type Batch1ExecutionAssets struct {
	Dataset        *Dataset        `json:"dataset,omitempty"`
	DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
	EvalSpec       *EvalSpec       `json:"eval_spec,omitempty"`
}

type BatchTrajectoryCase ¶

type BatchTrajectoryCase struct {
	Key      string                 `json:"key,omitempty"`
	Prompt   string                 `json:"prompt,omitempty"`
	Input    map[string]interface{} `json:"input,omitempty"`
	Metadata map[string]interface{} `json:"metadata,omitempty"`
}

type BatchTrajectoryCaseResult ¶

type BatchTrajectoryCaseResult struct {
	Input        map[string]interface{} `json:"input,omitempty"`
	Expected     map[string]interface{} `json:"expected,omitempty"`
	Metadata     map[string]interface{} `json:"metadata,omitempty"`
	TraceSummary string                 `json:"trace_summary,omitempty"`
	Provenance   map[string]interface{} `json:"provenance,omitempty"`
}

type BatchTrajectoryExecutor ¶

type BatchTrajectoryExecutor interface {
	ExecuteBatchTrajectoryCase(ctx context.Context, c BatchTrajectoryCase) (*BatchTrajectoryCaseResult, error)
}

type BatchTrajectoryRunResult ¶

type BatchTrajectoryRunResult struct {
	Manifest           DatasetManifest    `json:"manifest"`
	DatasetVersionSpec DatasetVersionSpec `json:"dataset_version_spec"`
	CompletedCaseKeys  []string           `json:"completed_case_keys,omitempty"`
	FailedCaseKeys     []string           `json:"failed_case_keys,omitempty"`
	CheckpointPath     string             `json:"checkpoint_path,omitempty"`
}

type BatchTrajectoryRunner ¶

type BatchTrajectoryRunner struct {
	// contains filtered or unexported fields
}

func NewBatchTrajectoryRunner ¶

func NewBatchTrajectoryRunner(cfg BatchTrajectoryRunnerConfig, executor BatchTrajectoryExecutor) *BatchTrajectoryRunner

func (*BatchTrajectoryRunner) Run ¶

func (r *BatchTrajectoryRunner) Run(ctx context.Context, cases []BatchTrajectoryCase) (*BatchTrajectoryRunResult, error)

type BatchTrajectoryRunnerConfig ¶

type BatchTrajectoryRunnerConfig struct {
	DatasetName    string                 `json:"dataset_name"`
	DatasetSubject string                 `json:"dataset_subject"`
	DatasetVersion string                 `json:"dataset_version"`
	SourceType     string                 `json:"source_type,omitempty"`
	SourceRef      string                 `json:"source_ref,omitempty"`
	CreatedBy      string                 `json:"created_by,omitempty"`
	CandidateID    string                 `json:"candidate_id,omitempty"`
	CheckpointPath string                 `json:"checkpoint_path"`
	DefaultRunKind RunKind                `json:"default_run_kind,omitempty"`
	DefaultProfile string                 `json:"default_profile,omitempty"`
	Scheduler      GroupSchedulerConfig   `json:"scheduler,omitempty"`
	Scoring        GroupScoringConfig     `json:"scoring,omitempty"`
	RuntimePolicy  map[string]interface{} `json:"runtime_policy,omitempty"`
	MaxRetries     int                    `json:"max_retries,omitempty"`
}

type CUABenchmarkCaseResult ¶

type CUABenchmarkCaseResult struct {
	ID                 string `json:"id,omitempty"`
	Passed             bool   `json:"passed"`
	Critical           bool   `json:"critical,omitempty"`
	VerificationBacked bool   `json:"verification_backed,omitempty"`
	FailureLabel       string `json:"failure_label,omitempty"`
	LatencyMS          int    `json:"latency_ms,omitempty"`
}

func CUAOSWorldMacBenchmarkCaseResultFromSummary ¶

func CUAOSWorldMacBenchmarkCaseResultFromSummary(id string, summary map[string]interface{}) CUABenchmarkCaseResult

type CUABenchmarkReport ¶

type CUABenchmarkReport struct {
	CaseCount                     int            `json:"case_count"`
	PassedCount                   int            `json:"passed_count"`
	CriticalCount                 int            `json:"critical_count"`
	CriticalPassedCount           int            `json:"critical_passed_count"`
	PassRate                      float64        `json:"pass_rate"`
	CriticalPassRate              float64        `json:"critical_pass_rate"`
	UnsafeSendCount               int            `json:"unsafe_send_count"`
	TypedBodyIntoSearchFieldCount int            `json:"typed_body_into_search_field_count"`
	VerificationBackedRate        float64        `json:"verification_backed_rate"`
	LatencyP50MS                  int            `json:"latency_p50_ms"`
	LatencyP90MS                  int            `json:"latency_p90_ms"`
	FailureLabelCounts            map[string]int `json:"failure_label_counts,omitempty"`
}

func EvaluateCUABenchmarkReport ¶

func EvaluateCUABenchmarkReport(results []CUABenchmarkCaseResult) CUABenchmarkReport

type CUAOSWorldMacBenchmarkAssets ¶

type CUAOSWorldMacBenchmarkAssets struct {
	Dataset        *Dataset        `json:"dataset,omitempty"`
	DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
	EvalSpec       *EvalSpec       `json:"eval_spec,omitempty"`
}

type CanonicalSkillSourceState ¶

type CanonicalSkillSourceState struct {
	AbsolutePath   string
	NormalizedPath string
	Content        string
	ContentSHA256  string
}

func ResolveCanonicalSkillSourceState ¶

func ResolveCanonicalSkillSourceState(skillID string, sourcePath string) (*CanonicalSkillSourceState, error)

func ResolveWritableSkillSourceState ¶

func ResolveWritableSkillSourceState(skillID string, sourcePath string) (*CanonicalSkillSourceState, error)

type CheckpointArtifact ¶

type CheckpointArtifact struct {
	RunID       string                 `json:"run_id"`
	GroupItemID string                 `json:"group_item_id,omitempty"`
	Artifact    ArtifactRef            `json:"artifact"`
	Payload     map[string]interface{} `json:"payload,omitempty"`
}

type CompareEvalRunRequest ¶

type CompareEvalRunRequest struct {
	BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
	BaselineID    string `json:"baseline_id,omitempty"`
}

type ComparisonCaseDelta ¶

type ComparisonCaseDelta struct {
	Key                 string  `json:"key"`
	Label               string  `json:"label,omitempty"`
	ItemIndex           int     `json:"item_index"`
	Profile             string  `json:"profile,omitempty"`
	BaseVerdict         string  `json:"base_verdict,omitempty"`
	TargetVerdict       string  `json:"target_verdict,omitempty"`
	BaseStatus          string  `json:"base_status,omitempty"`
	TargetStatus        string  `json:"target_status,omitempty"`
	BaseScore           float64 `json:"base_score,omitempty"`
	TargetScore         float64 `json:"target_score,omitempty"`
	DeltaScore          float64 `json:"delta_score,omitempty"`
	BaseRunID           string  `json:"base_run_id,omitempty"`
	TargetRunID         string  `json:"target_run_id,omitempty"`
	BaseReason          string  `json:"base_reason,omitempty"`
	TargetReason        string  `json:"target_reason,omitempty"`
	BaseFailureLabel    string  `json:"base_failure_label,omitempty"`
	TargetFailureLabel  string  `json:"target_failure_label,omitempty"`
	BaseVerification    string  `json:"base_verification,omitempty"`
	TargetVerification  string  `json:"target_verification,omitempty"`
	BaseEvidenceScore   float64 `json:"base_evidence_score,omitempty"`
	TargetEvidenceScore float64 `json:"target_evidence_score,omitempty"`
}

type ComparisonReport ¶

type ComparisonReport struct {
	ID              string                 `json:"id"`
	OwnerUserID     string                 `json:"owner_user_id,omitempty"`
	BaselineID      string                 `json:"baseline_id,omitempty"`
	EvalSpecID      string                 `json:"eval_spec_id"`
	BaseEvalRunID   string                 `json:"base_eval_run_id"`
	TargetEvalRunID string                 `json:"target_eval_run_id"`
	Summary         map[string]interface{} `json:"summary,omitempty"`
	Regressions     []ComparisonCaseDelta  `json:"regressions,omitempty"`
	Improvements    []ComparisonCaseDelta  `json:"improvements,omitempty"`
	ScorerDelta     map[string]interface{} `json:"scorer_delta,omitempty"`
	CreatedAt       time.Time              `json:"created_at"`
}

type Controller ¶

type Controller struct {
	// contains filtered or unexported fields
}

func NewController ¶

func NewController(store *SQLiteStore, resolver *PolicyResolver) *Controller

func (*Controller) AppendEvent ¶

func (c *Controller) AppendEvent(ctx context.Context, event RunEvent) error

func (*Controller) AttachArtifact ¶

func (c *Controller) AttachArtifact(ctx context.Context, ref ArtifactRef) error

func (*Controller) BuildEvolutionOverview ¶

func (c *Controller) BuildEvolutionOverview(
	ctx context.Context,
	skillID string,
	ownerUserID string,
) (*EvolutionOverview, error)

func (*Controller) BuildSkillEvolutionCaseDetail ¶

func (c *Controller) BuildSkillEvolutionCaseDetail(ctx context.Context, evolutionCase *SkillEvolutionCase) (*SkillEvolutionCaseDetail, error)

func (*Controller) Cancel ¶

func (c *Controller) Cancel(ctx context.Context, id string, reason string) error

func (*Controller) CancelEvalRun ¶

func (c *Controller) CancelEvalRun(ctx context.Context, id string, reason string) error

func (*Controller) CancelGroup ¶

func (c *Controller) CancelGroup(ctx context.Context, id string, reason string) error

func (*Controller) CompareEvalRun ¶

func (c *Controller) CompareEvalRun(ctx context.Context, targetEvalRunID string, req CompareEvalRunRequest) (*ComparisonReport, error)

func (*Controller) CreateBaseline ¶

func (c *Controller) CreateBaseline(ctx context.Context, spec BaselineSpec) (*Baseline, error)

func (*Controller) CreateDataset ¶

func (c *Controller) CreateDataset(ctx context.Context, spec DatasetSpec) (*Dataset, error)

func (*Controller) CreateDatasetVersion ¶

func (c *Controller) CreateDatasetVersion(ctx context.Context, datasetID string, spec DatasetVersionSpec) (*DatasetVersion, error)

func (*Controller) CreateEvalSpec ¶

func (c *Controller) CreateEvalSpec(ctx context.Context, spec EvalSpecSpec) (*EvalSpec, error)

func (*Controller) CreateSkillEvolutionCase ¶

func (c *Controller) CreateSkillEvolutionCase(ctx context.Context, evolutionCase SkillEvolutionCase) (*SkillEvolutionCase, error)

func (*Controller) CreateSkillRevision ¶

func (c *Controller) CreateSkillRevision(ctx context.Context, revision SkillRevision) (*SkillRevision, error)

func (*Controller) Delete ¶

func (c *Controller) Delete(ctx context.Context, id string) error

func (*Controller) EnsureBatch1ExecutionAssets ¶

func (c *Controller) EnsureBatch1ExecutionAssets(ctx context.Context, ownerUserID string) (*Batch1ExecutionAssets, error)

func (*Controller) EnsureDesktopChatSuccessRateAssets ¶

func (c *Controller) EnsureDesktopChatSuccessRateAssets(ctx context.Context, ownerUserID string) (*DesktopChatSuccessRateAssets, error)

func (*Controller) EnsureSelectorCuratedAssets ¶

func (c *Controller) EnsureSelectorCuratedAssets(ctx context.Context, ownerUserID string) (*SelectorCuratedAssets, error)

func (*Controller) EnsureSkillEvolutionCase ¶

func (c *Controller) EnsureSkillEvolutionCase(ctx context.Context, spec SkillEvolutionCaseSpec) (*SkillEvolutionCase, bool, error)

func (*Controller) EvaluateExecutionEquivalence ¶

func (c *Controller) EvaluateExecutionEquivalence(ctx context.Context, targetEvalRunID string, req ExecutionEquivalenceRequest) (*ExecutionEquivalenceReport, error)

func (*Controller) EvaluateSelectorGate ¶

func (c *Controller) EvaluateSelectorGate(ctx context.Context, targetEvalRunID string, req SelectorGateRequest) (*SelectorGateReport, error)

func (*Controller) EvaluateSkillCutoverBudgetGate ¶

func (c *Controller) EvaluateSkillCutoverBudgetGate(ctx context.Context, targetEvalRunID string, req SkillCutoverBudgetRequest) (*SkillCutoverBudgetReport, error)

func (*Controller) EvaluateSkillCutoverReadiness ¶

func (c *Controller) EvaluateSkillCutoverReadiness(ctx context.Context, req SkillCutoverReadinessRequest) (*SkillCutoverReadinessReport, error)

func (*Controller) ExecutionMiddlewares ¶

func (c *Controller) ExecutionMiddlewares() []ExecutionMiddleware

func (*Controller) FindRunByMetadata ¶

func (c *Controller) FindRunByMetadata(ctx context.Context, kind RunKind, key string, value string) (*Run, error)

func (*Controller) Get ¶

func (c *Controller) Get(ctx context.Context, id string) (*Run, error)

func (*Controller) GetComparisonReport ¶

func (c *Controller) GetComparisonReport(ctx context.Context, id string) (*ComparisonReport, error)

func (*Controller) GetDataset ¶

func (c *Controller) GetDataset(ctx context.Context, id string) (*Dataset, error)

func (*Controller) GetDatasetVersion ¶

func (c *Controller) GetDatasetVersion(ctx context.Context, id string) (*DatasetVersion, error)

func (*Controller) GetEvalRun ¶

func (c *Controller) GetEvalRun(ctx context.Context, id string) (*EvalRun, error)

func (*Controller) GetEvalRunReport ¶

func (c *Controller) GetEvalRunReport(ctx context.Context, id string) (*EvalRunReport, error)

func (*Controller) GetEvalSpec ¶

func (c *Controller) GetEvalSpec(ctx context.Context, id string) (*EvalSpec, error)

func (*Controller) GetGroup ¶

func (c *Controller) GetGroup(ctx context.Context, id string) (*RunGroup, error)

func (*Controller) GetGroupReport ¶

func (c *Controller) GetGroupReport(ctx context.Context, id string) (*RunGroupReport, error)

func (*Controller) GetRegisteredDriver ¶

func (c *Controller) GetRegisteredDriver(kind RunKind) Driver

func (*Controller) GetSkillEvolutionCase ¶

func (c *Controller) GetSkillEvolutionCase(ctx context.Context, id string) (*SkillEvolutionCase, error)

func (*Controller) GetSkillEvolutionCaseDetail ¶

func (c *Controller) GetSkillEvolutionCaseDetail(ctx context.Context, id string) (*SkillEvolutionCaseDetail, error)

func (*Controller) GetSkillRevision ¶

func (c *Controller) GetSkillRevision(ctx context.Context, id string) (*SkillRevision, error)

func (*Controller) GetStored ¶

func (c *Controller) GetStored(ctx context.Context, id string) (*Run, error)

func (*Controller) ImportDatasetBundle ¶

func (c *Controller) ImportDatasetBundle(ctx context.Context, req ImportDatasetBundleRequest) (*ImportDatasetBundleResult, error)

func (*Controller) List ¶

func (c *Controller) List(ctx context.Context, filter RunFilter) ([]Run, error)

func (*Controller) ListArtifacts ¶

func (c *Controller) ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)

func (*Controller) ListBaselines ¶

func (c *Controller) ListBaselines(ctx context.Context, filter BaselineFilter) ([]Baseline, error)

func (*Controller) ListDatasetVersions ¶

func (c *Controller) ListDatasetVersions(ctx context.Context, datasetID string, limit int) ([]DatasetVersion, error)

func (*Controller) ListDatasets ¶

func (c *Controller) ListDatasets(ctx context.Context, filter DatasetFilter) ([]Dataset, error)

func (*Controller) ListEvalRuns ¶

func (c *Controller) ListEvalRuns(ctx context.Context, filter EvalRunFilter) ([]EvalRun, error)

func (*Controller) ListEvalSpecs ¶

func (c *Controller) ListEvalSpecs(ctx context.Context, filter EvalSpecFilter) ([]EvalSpec, error)

func (*Controller) ListEvents ¶

func (c *Controller) ListEvents(ctx context.Context, runID string, limit int) ([]RunEvent, error)

func (*Controller) ListGroupItems ¶

func (c *Controller) ListGroupItems(ctx context.Context, groupID string) ([]RunGroupItem, error)

func (*Controller) ListGroups ¶

func (c *Controller) ListGroups(ctx context.Context, filter RunGroupFilter) ([]RunGroup, error)

func (*Controller) ListOne ¶

func (c *Controller) ListOne(ctx context.Context, id string) (*Run, error)

func (*Controller) ListSkillDecisionHistory ¶

func (c *Controller) ListSkillDecisionHistory(ctx context.Context, filter SkillDecisionHistoryFilter) ([]SkillDecisionHistoryEntry, error)

func (*Controller) ListSkillEvolutionCases ¶

func (c *Controller) ListSkillEvolutionCases(ctx context.Context, filter SkillEvolutionCaseFilter) ([]SkillEvolutionCase, error)

func (*Controller) ListSkillRevisions ¶

func (c *Controller) ListSkillRevisions(ctx context.Context, filter SkillRevisionFilter) ([]SkillRevision, error)

func (*Controller) OptimizeSkill ¶

func (c *Controller) OptimizeSkill(ctx context.Context, skillID string, req SkillOptimizeRequest) (*OptimizationTrigger, error)

func (*Controller) PerformAction ¶

func (c *Controller) PerformAction(ctx context.Context, id string, action string, input map[string]interface{}) (*Run, error)

func (*Controller) PerformGroupAction ¶

func (c *Controller) PerformGroupAction(ctx context.Context, id string, action string, input map[string]interface{}) (*RunGroup, error)

func (*Controller) PromoteGroup ¶

func (c *Controller) PromoteGroup(ctx context.Context, groupID string, spec GroupPromotionSpec) (*GroupPromotionResult, error)

func (*Controller) PromoteSkillRevision ¶

func (c *Controller) PromoteSkillRevision(ctx context.Context, revisionID string, decision SkillRevisionDecisionRequest) (*SkillPromoteResult, error)

func (*Controller) RegisterDriver ¶

func (c *Controller) RegisterDriver(driver Driver)

func (*Controller) RetryFailedGroup ¶

func (c *Controller) RetryFailedGroup(ctx context.Context, id string) (int, error)

func (*Controller) RollbackSkillRevision ¶

func (c *Controller) RollbackSkillRevision(ctx context.Context, revisionID string, decision SkillRevisionDecisionRequest) (*SkillPromoteResult, error)

func (*Controller) RunTraceSnapshot ¶

func (c *Controller) RunTraceSnapshot(ctx context.Context, runID string) (*RunTrace, error)

func (*Controller) SetJudgeEvaluator ¶

func (c *Controller) SetJudgeEvaluator(evaluator JudgeEvaluator)

func (*Controller) SetOptimizationTriggerer ¶

func (c *Controller) SetOptimizationTriggerer(triggerer OptimizationTriggerer)

func (*Controller) SetReflector ¶

func (c *Controller) SetReflector(reflector ProposalReflector)

func (*Controller) SetRunTraceProvider ¶

func (c *Controller) SetRunTraceProvider(provider RunTraceProvider)

func (*Controller) SetRuntimeReflectionCoordinator ¶

func (c *Controller) SetRuntimeReflectionCoordinator(coordinator *RuntimeReflectionCoordinator)

func (*Controller) SpawnChild ¶

func (c *Controller) SpawnChild(ctx context.Context, parentID string, spec RunSpec) (*Run, error)

func (*Controller) Submit ¶

func (c *Controller) Submit(ctx context.Context, spec RunSpec) (*Run, error)

func (*Controller) SubmitEvalRun ¶

func (c *Controller) SubmitEvalRun(ctx context.Context, spec EvalRunSpec) (*EvalRun, error)

func (*Controller) SubmitGroup ¶

func (c *Controller) SubmitGroup(ctx context.Context, spec RunGroupSpec) (*RunGroup, error)

func (*Controller) SyncSnapshot ¶

func (c *Controller) SyncSnapshot(ctx context.Context, snapshot *Run) error

func (*Controller) UpdateSkillEvolutionCase ¶

func (c *Controller) UpdateSkillEvolutionCase(ctx context.Context, evolutionCase SkillEvolutionCase) (*SkillEvolutionCase, error)

func (*Controller) UpdateSkillRevision ¶

func (c *Controller) UpdateSkillRevision(ctx context.Context, revision SkillRevision) (*SkillRevision, error)

func (*Controller) UseExecutionMiddleware ¶

func (c *Controller) UseExecutionMiddleware(mw ExecutionMiddleware)

type Dataset ¶

type Dataset struct {
	ID              string                 `json:"id"`
	Name            string                 `json:"name"`
	Description     string                 `json:"description,omitempty"`
	OwnerUserID     string                 `json:"owner_user_id,omitempty"`
	Subject         string                 `json:"subject,omitempty"`
	DefaultRunKind  RunKind                `json:"default_run_kind,omitempty"`
	DefaultProfile  string                 `json:"default_profile,omitempty"`
	ActiveVersionID string                 `json:"active_version_id,omitempty"`
	Metadata        map[string]interface{} `json:"metadata,omitempty"`
	CreatedAt       time.Time              `json:"created_at"`
	UpdatedAt       time.Time              `json:"updated_at"`
}

type DatasetBundleSourcePreview ¶

type DatasetBundleSourcePreview struct {
	SourceType string                        `json:"source_type,omitempty"`
	SourceRef  string                        `json:"source_ref,omitempty"`
	Dataset    DatasetSpec                   `json:"dataset"`
	Version    DatasetBundleVersionPreview   `json:"version"`
	EvalSpecs  []ImportDatasetBundleEvalSpec `json:"eval_specs,omitempty"`
	MakeActive bool                          `json:"make_active,omitempty"`
}

type DatasetBundleVersionPreview ¶

type DatasetBundleVersionPreview struct {
	Version        string `json:"version,omitempty"`
	ItemCount      int    `json:"item_count"`
	ManifestSHA256 string `json:"manifest_sha256,omitempty"`
	SourceType     string `json:"source_type,omitempty"`
	SourceRef      string `json:"source_ref,omitempty"`
}

type DatasetFilter ¶

type DatasetFilter struct {
	OwnerUserID string
	Limit       int
}

type DatasetManifest ¶

type DatasetManifest struct {
	Dataset  DatasetManifestMeta     `json:"dataset,omitempty"`
	Defaults DatasetManifestDefaults `json:"defaults,omitempty"`
	Items    []DatasetManifestItem   `json:"items,omitempty"`
}

func Batch1ExecutionDatasetManifest ¶

func Batch1ExecutionDatasetManifest() DatasetManifest

func CUAOSWorldMacBenchmarkManifest ¶

func CUAOSWorldMacBenchmarkManifest() DatasetManifest

func DesktopChatSuccessRateDatasetManifest ¶

func DesktopChatSuccessRateDatasetManifest() DatasetManifest

func SelectorCuratedDatasetManifest ¶

func SelectorCuratedDatasetManifest() DatasetManifest

SelectorCuratedDatasetManifest returns the built-in selector routing dataset that freezes curated dry-run expectations into a reusable Harness manifest.

type DatasetManifestDefaults ¶

type DatasetManifestDefaults struct {
	RunKind       RunKind                `json:"run_kind,omitempty"`
	Profile       string                 `json:"profile,omitempty"`
	Scheduler     GroupSchedulerConfig   `json:"scheduler,omitempty"`
	Scoring       GroupScoringConfig     `json:"scoring,omitempty"`
	RuntimePolicy map[string]interface{} `json:"runtime_policy,omitempty"`
}

type DatasetManifestItem ¶

type DatasetManifestItem struct {
	ID       string                 `json:"id,omitempty"`
	RunKind  RunKind                `json:"run_kind,omitempty"`
	Profile  string                 `json:"profile,omitempty"`
	Input    map[string]interface{} `json:"input,omitempty"`
	Expected map[string]interface{} `json:"expected,omitempty"`
	Metadata map[string]interface{} `json:"metadata,omitempty"`
}

type DatasetManifestMeta ¶

type DatasetManifestMeta struct {
	Name    string `json:"name,omitempty"`
	Subject string `json:"subject,omitempty"`
}

type DatasetSpec ¶

type DatasetSpec struct {
	Name           string                 `json:"name"`
	Description    string                 `json:"description,omitempty"`
	OwnerUserID    string                 `json:"owner_user_id,omitempty"`
	Subject        string                 `json:"subject,omitempty"`
	DefaultRunKind RunKind                `json:"default_run_kind,omitempty"`
	DefaultProfile string                 `json:"default_profile,omitempty"`
	Metadata       map[string]interface{} `json:"metadata,omitempty"`
}

func Batch1ExecutionDatasetSpec ¶

func Batch1ExecutionDatasetSpec(ownerUserID string) DatasetSpec

func CUAOSWorldMacBenchmarkDatasetSpec ¶

func CUAOSWorldMacBenchmarkDatasetSpec(ownerUserID string) DatasetSpec

func DesktopChatSuccessRateDatasetSpec ¶

func DesktopChatSuccessRateDatasetSpec(ownerUserID string) DatasetSpec

func SelectorCuratedDatasetSpec ¶

func SelectorCuratedDatasetSpec(ownerUserID string) DatasetSpec

SelectorCuratedDatasetSpec provides the reusable dataset shell for the built-in selector routing manifest.

type DatasetVersion ¶

type DatasetVersion struct {
	ID             string                 `json:"id"`
	DatasetID      string                 `json:"dataset_id"`
	Version        string                 `json:"version"`
	ManifestSHA256 string                 `json:"manifest_sha256,omitempty"`
	ItemCount      int                    `json:"item_count"`
	SourceType     string                 `json:"source_type,omitempty"`
	SourceRef      string                 `json:"source_ref,omitempty"`
	Manifest       map[string]interface{} `json:"manifest,omitempty"`
	Metadata       map[string]interface{} `json:"metadata,omitempty"`
	CreatedBy      string                 `json:"created_by,omitempty"`
	CreatedAt      time.Time              `json:"created_at"`
}

func (*DatasetVersion) DecodeManifest ¶

func (v *DatasetVersion) DecodeManifest() (*DatasetManifest, error)

type DatasetVersionSpec ¶

type DatasetVersionSpec struct {
	Version    string                 `json:"version,omitempty"`
	SourceType string                 `json:"source_type,omitempty"`
	SourceRef  string                 `json:"source_ref,omitempty"`
	Manifest   map[string]interface{} `json:"manifest"`
	Metadata   map[string]interface{} `json:"metadata,omitempty"`
	CreatedBy  string                 `json:"created_by,omitempty"`
}

func Batch1ExecutionDatasetVersionSpec ¶

func Batch1ExecutionDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)

func CUAOSWorldMacBenchmarkDatasetVersionSpec ¶

func CUAOSWorldMacBenchmarkDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)

func DesktopChatSuccessRateDatasetVersionSpec ¶

func DesktopChatSuccessRateDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)

func SelectorCuratedDatasetVersionSpec ¶

func SelectorCuratedDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)

SelectorCuratedDatasetVersionSpec freezes the current built-in selector manifest into a versioned DatasetVersionSpec.

type Defaults ¶

type Defaults struct {
	ArtifactRoot      string
	StorePath         string
	ApprovalMode      ApprovalMode
	SandboxMode       string
	MaxDuration       time.Duration
	MaxSteps          int
	MaxToolRounds     int
	MaxSubagents      int
	MaxDepth          int
	ProtectedPrefixes []string
}

type DesktopChatSuccessRateAssets ¶

type DesktopChatSuccessRateAssets struct {
	Dataset        *Dataset        `json:"dataset,omitempty"`
	DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
	EvalSpec       *EvalSpec       `json:"eval_spec,omitempty"`
}

type DesktopChatSuccessRateGateCheck ¶

type DesktopChatSuccessRateGateCheck struct {
	Name     string      `json:"name"`
	Passed   bool        `json:"passed"`
	Actual   interface{} `json:"actual,omitempty"`
	Expected interface{} `json:"expected,omitempty"`
}

type DesktopChatSuccessRateGateReport ¶

type DesktopChatSuccessRateGateReport struct {
	CaseCount                       int                               `json:"case_count"`
	PassedCount                     int                               `json:"passed_count"`
	PassRate                        float64                           `json:"pass_rate"`
	UnsafeSendCount                 int                               `json:"unsafe_send_count"`
	TypedBodyIntoSearchFieldCount   int                               `json:"typed_body_into_search_field_count"`
	ExistingFocusedRegressionPassed bool                              `json:"existing_focused_regression_passed"`
	FailureLabelCounts              map[string]int                    `json:"failure_label_counts,omitempty"`
	Checks                          []DesktopChatSuccessRateGateCheck `json:"checks,omitempty"`
	Passed                          bool                              `json:"passed"`
}

func EvaluateDesktopChatSuccessRateGate ¶

func EvaluateDesktopChatSuccessRateGate(summary map[string]interface{}, existingFocusedRegressionPassed bool) DesktopChatSuccessRateGateReport

type Driver ¶

type Driver interface {
	Kind() RunKind
	Validate(spec RunSpec) error
	Start(ctx context.Context, run *Run, env RunEnv) error
	Cancel(ctx context.Context, run *Run) error
}

type EvalRun ¶

type EvalRun struct {
	ID                string                 `json:"id"`
	EvalSpecID        string                 `json:"eval_spec_id"`
	GroupID           string                 `json:"group_id"`
	DatasetVersionID  string                 `json:"dataset_version_id,omitempty"`
	BaselineEvalRunID string                 `json:"baseline_eval_run_id,omitempty"`
	Title             string                 `json:"title,omitempty"`
	OwnerUserID       string                 `json:"owner_user_id,omitempty"`
	Status            RunGroupStatus         `json:"status"`
	TriggerKind       string                 `json:"trigger_kind,omitempty"`
	TriggerRef        string                 `json:"trigger_ref,omitempty"`
	Metadata          map[string]interface{} `json:"metadata,omitempty"`
	Summary           map[string]interface{} `json:"summary,omitempty"`
	CreatedAt         time.Time              `json:"created_at"`
	UpdatedAt         time.Time              `json:"updated_at"`
	StartedAt         *time.Time             `json:"started_at,omitempty"`
	FinishedAt        *time.Time             `json:"finished_at,omitempty"`
}

type EvalRunFilter ¶

type EvalRunFilter struct {
	OwnerUserID string
	EvalSpecID  string
	Statuses    []RunGroupStatus
	Limit       int
}

type EvalRunReport ¶

type EvalRunReport struct {
	EvalRun        *EvalRun        `json:"eval_run"`
	EvalSpec       *EvalSpec       `json:"eval_spec,omitempty"`
	Dataset        *Dataset        `json:"dataset,omitempty"`
	DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
	GroupReport    *RunGroupReport `json:"group_report,omitempty"`
}

type EvalRunSpec ¶

type EvalRunSpec struct {
	EvalSpecID        string                 `json:"eval_spec_id"`
	BaselineEvalRunID string                 `json:"baseline_eval_run_id,omitempty"`
	Title             string                 `json:"title,omitempty"`
	OwnerUserID       string                 `json:"owner_user_id,omitempty"`
	TriggerKind       string                 `json:"trigger_kind,omitempty"`
	TriggerRef        string                 `json:"trigger_ref,omitempty"`
	Metadata          map[string]interface{} `json:"metadata,omitempty"`
}

type EvalSpec ¶

type EvalSpec struct {
	ID               string                 `json:"id"`
	Name             string                 `json:"name"`
	OwnerUserID      string                 `json:"owner_user_id,omitempty"`
	Subject          string                 `json:"subject,omitempty"`
	RunKind          RunKind                `json:"run_kind"`
	Profile          string                 `json:"profile,omitempty"`
	DatasetID        string                 `json:"dataset_id,omitempty"`
	DatasetVersionID string                 `json:"dataset_version_id,omitempty"`
	SchedulerConfig  GroupSchedulerConfig   `json:"scheduler_config,omitempty"`
	ScoringConfig    GroupScoringConfig     `json:"scoring_config,omitempty"`
	RuntimePolicy    map[string]interface{} `json:"runtime_policy,omitempty"`
	Metadata         map[string]interface{} `json:"metadata,omitempty"`
	CreatedAt        time.Time              `json:"created_at"`
	UpdatedAt        time.Time              `json:"updated_at"`
}

type EvalSpecFilter ¶

type EvalSpecFilter struct {
	OwnerUserID string
	DatasetID   string
	Limit       int
}

type EvalSpecSpec ¶

type EvalSpecSpec struct {
	Name             string                 `json:"name"`
	OwnerUserID      string                 `json:"owner_user_id,omitempty"`
	Subject          string                 `json:"subject,omitempty"`
	RunKind          RunKind                `json:"run_kind,omitempty"`
	Profile          string                 `json:"profile,omitempty"`
	DatasetID        string                 `json:"dataset_id"`
	DatasetVersionID string                 `json:"dataset_version_id,omitempty"`
	SchedulerConfig  GroupSchedulerConfig   `json:"scheduler"`
	ScoringConfig    GroupScoringConfig     `json:"scoring"`
	RuntimePolicy    map[string]interface{} `json:"runtime_policy,omitempty"`
	Metadata         map[string]interface{} `json:"metadata,omitempty"`
}

func Batch1ExecutionEvalSpecSpec ¶

func Batch1ExecutionEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec

func CUAOSWorldMacBenchmarkEvalSpecSpec ¶

func CUAOSWorldMacBenchmarkEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec

func DesktopChatSuccessRateEvalSpecSpec ¶

func DesktopChatSuccessRateEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec

func SelectorCuratedEvalSpecSpec ¶

func SelectorCuratedEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec

SelectorCuratedEvalSpecSpec provides a reusable EvalSpec template wired to the selector dry-run contract. The concrete dataset/version ids must be supplied by the caller.

type EvolutionOverview ¶

type EvolutionOverview struct {
	SkillID      string                             `json:"skill_id,omitempty"`
	Revisions    EvolutionOverviewRevisionCounts    `json:"revisions"`
	Instructions EvolutionOverviewInstructionCounts `json:"instructions"`
}

type EvolutionOverviewInstructionCounts ¶

type EvolutionOverviewInstructionCounts struct {
	Pending int `json:"pending"`
}

type EvolutionOverviewRevisionCounts ¶

type EvolutionOverviewRevisionCounts struct {
	Accepted int `json:"accepted"`
}

type EvolutionProposalSummaryProvider ¶

type EvolutionProposalSummaryProvider interface {
	PendingProposalCount(ctx context.Context, ownerUserID string) (int, error)
}

type ExecPathGuard ¶

type ExecPathGuard struct {
	// contains filtered or unexported fields
}

func NewExecPathGuard ¶

func NewExecPathGuard(manager *Controller) *ExecPathGuard

func (*ExecPathGuard) CheckExecPath ¶

func (g *ExecPathGuard) CheckExecPath(ctx context.Context, absPath string) error

func (*ExecPathGuard) CheckExecWorkdir ¶

func (g *ExecPathGuard) CheckExecWorkdir(ctx context.Context, absWorkdir string) error

type ExecutionEquivalenceCheck ¶

type ExecutionEquivalenceCheck struct {
	Name     string                 `json:"name"`
	Passed   bool                   `json:"passed"`
	Actual   interface{}            `json:"actual,omitempty"`
	Expected interface{}            `json:"expected,omitempty"`
	Details  map[string]interface{} `json:"details,omitempty"`
}

type ExecutionEquivalenceMetrics ¶

type ExecutionEquivalenceMetrics struct {
	CaseCount                    int                                           `json:"case_count"`
	PassedCount                  int                                           `json:"passed_count"`
	PassRate                     float64                                       `json:"pass_rate"`
	CriticalCaseCount            int                                           `json:"critical_case_count"`
	CriticalPassedCount          int                                           `json:"critical_passed_count"`
	CriticalPassRate             float64                                       `json:"critical_pass_rate"`
	InfraBlockedCount            int                                           `json:"infra_blocked_count"`
	BaseInfraBlockedCount        int                                           `json:"base_infra_blocked_count"`
	TargetInfraBlockedCount      int                                           `json:"target_infra_blocked_count"`
	InfraBlockedDelta            int                                           `json:"infra_blocked_delta"`
	BasePassRate                 float64                                       `json:"base_pass_rate"`
	TargetPassRate               float64                                       `json:"target_pass_rate"`
	PassRateDelta                float64                                       `json:"pass_rate_delta"`
	BaseVerificationPassRate     float64                                       `json:"base_verification_pass_rate"`
	TargetVerificationPassRate   float64                                       `json:"target_verification_pass_rate"`
	VerificationPassRateDelta    float64                                       `json:"verification_pass_rate_delta"`
	BaseEvidenceBackedPassRate   float64                                       `json:"base_evidence_backed_pass_rate"`
	TargetEvidenceBackedPassRate float64                                       `json:"target_evidence_backed_pass_rate"`
	EvidenceBackedPassRateDelta  float64                                       `json:"evidence_backed_pass_rate_delta"`
	RegressionCount              int                                           `json:"regression_count"`
	ImprovementCount             int                                           `json:"improvement_count"`
	NewFailureCount              int                                           `json:"new_failure_count"`
	ResolvedFailureCount         int                                           `json:"resolved_failure_count"`
	CriticalRegressionCount      int                                           `json:"critical_regression_count"`
	LocaleBreakdown              map[string]ExecutionEquivalenceSegmentMetrics `json:"locale_breakdown,omitempty"`
	PrimaryRouteBreakdown        map[string]ExecutionEquivalenceSegmentMetrics `json:"primary_route_breakdown,omitempty"`
}

type ExecutionEquivalenceReport ¶

type ExecutionEquivalenceReport struct {
	TargetEvalRunID    string                      `json:"target_eval_run_id"`
	BaseEvalRunID      string                      `json:"base_eval_run_id,omitempty"`
	BaselineID         string                      `json:"baseline_id,omitempty"`
	ComparisonReportID string                      `json:"comparison_report_id,omitempty"`
	Metrics            ExecutionEquivalenceMetrics `json:"metrics"`
	Thresholds         map[string]interface{}      `json:"thresholds,omitempty"`
	Checks             []ExecutionEquivalenceCheck `json:"checks,omitempty"`
	Passed             bool                        `json:"passed"`
	CreatedAt          time.Time                   `json:"created_at"`
}

type ExecutionEquivalenceRequest ¶

type ExecutionEquivalenceRequest struct {
	BaseEvalRunID string                         `json:"base_eval_run_id,omitempty"`
	BaselineID    string                         `json:"baseline_id,omitempty"`
	Thresholds    ExecutionEquivalenceThresholds `json:"thresholds,omitempty"`
}

type ExecutionEquivalenceSegmentMetrics ¶

type ExecutionEquivalenceSegmentMetrics struct {
	CaseCount               int     `json:"case_count"`
	PassedCount             int     `json:"passed_count"`
	PassRate                float64 `json:"pass_rate"`
	CriticalCaseCount       int     `json:"critical_case_count"`
	CriticalPassedCount     int     `json:"critical_passed_count"`
	CriticalPassRate        float64 `json:"critical_pass_rate"`
	InfraBlockedCount       int     `json:"infra_blocked_count"`
	RegressionCount         int     `json:"regression_count"`
	ImprovementCount        int     `json:"improvement_count"`
	NewFailureCount         int     `json:"new_failure_count"`
	ResolvedFailureCount    int     `json:"resolved_failure_count"`
	CriticalRegressionCount int     `json:"critical_regression_count"`
}

type ExecutionEquivalenceThresholds ¶

type ExecutionEquivalenceThresholds struct {
	MaxPassRateDrop               *float64 `json:"max_pass_rate_drop,omitempty"`
	MaxCriticalRegressionCount    *int     `json:"max_critical_regression_count,omitempty"`
	MaxVerificationPassRateDrop   *float64 `json:"max_verification_pass_rate_drop,omitempty"`
	MaxEvidenceBackedPassRateDrop *float64 `json:"max_evidence_backed_pass_rate_drop,omitempty"`
}

func DefaultExecutionEquivalenceThresholds ¶

func DefaultExecutionEquivalenceThresholds() ExecutionEquivalenceThresholds

type ExecutionMiddleware ¶

type ExecutionMiddleware interface {
	BeforeStart(ctx context.Context, runCtx *RunContext) error
	AfterStart(ctx context.Context, runCtx *RunContext)
	OnStartError(ctx context.Context, runCtx *RunContext, runErr error)
}

ExecutionMiddleware provides run-scoped lifecycle hooks around driver start.

func NewSkillCandidateMiddleware ¶

func NewSkillCandidateMiddleware() ExecutionMiddleware

type ExecutionMiddlewareHooks ¶

type ExecutionMiddlewareHooks struct {
	BeforeStartFunc  func(ctx context.Context, runCtx *RunContext) error
	AfterStartFunc   func(ctx context.Context, runCtx *RunContext)
	OnStartErrorFunc func(ctx context.Context, runCtx *RunContext, runErr error)
}

ExecutionMiddlewareHooks is a small adapter for wiring hook functions.

func (ExecutionMiddlewareHooks) AfterStart ¶

func (h ExecutionMiddlewareHooks) AfterStart(ctx context.Context, runCtx *RunContext)

func (ExecutionMiddlewareHooks) BeforeStart ¶

func (h ExecutionMiddlewareHooks) BeforeStart(ctx context.Context, runCtx *RunContext) error

func (ExecutionMiddlewareHooks) OnStartError ¶

func (h ExecutionMiddlewareHooks) OnStartError(ctx context.Context, runCtx *RunContext, runErr error)

type GroupDispatcher ¶

type GroupDispatcher struct {
	// contains filtered or unexported fields
}

func NewGroupDispatcher ¶

func NewGroupDispatcher(manager *Controller) *GroupDispatcher

func (*GroupDispatcher) DispatchOnce ¶

func (d *GroupDispatcher) DispatchOnce(ctx context.Context) error

func (*GroupDispatcher) SetPollInterval ¶

func (d *GroupDispatcher) SetPollInterval(interval time.Duration)

func (*GroupDispatcher) SetRunPollInterval ¶

func (d *GroupDispatcher) SetRunPollInterval(interval time.Duration)

func (*GroupDispatcher) Start ¶

func (d *GroupDispatcher) Start(ctx context.Context)

type GroupPromotionResult ¶

type GroupPromotionResult struct {
	Dataset        *Dataset        `json:"dataset,omitempty"`
	DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
	EvalSpec       *EvalSpec       `json:"eval_spec,omitempty"`
}

type GroupPromotionSpec ¶

type GroupPromotionSpec struct {
	DatasetName string `json:"dataset_name"`
	Description string `json:"description,omitempty"`
	Subject     string `json:"subject,omitempty"`
	EvalName    string `json:"eval_name"`
}

type GroupSchedulerConfig ¶

type GroupSchedulerConfig struct {
	MaxConcurrency int           `json:"max_concurrency,omitempty"`
	MaxAttempts    int           `json:"max_attempts,omitempty"`
	LeaseTTL       time.Duration `json:"lease_ttl,omitempty"`
	RetryBackoff   time.Duration `json:"retry_backoff,omitempty"`
}

type GroupScoringConfig ¶

type GroupScoringConfig struct {
	Mode          ScoringMode `json:"mode,omitempty"`
	RuleProfile   string      `json:"rule_profile,omitempty"`
	JudgeModel    string      `json:"judge_model,omitempty"`
	PassThreshold float64     `json:"pass_threshold,omitempty"`
}

type GuardPipelineError ¶

type GuardPipelineError struct {
	Stage   RuntimeStage           `json:"stage"`
	Code    string                 `json:"code"`
	Message string                 `json:"message"`
	Details map[string]interface{} `json:"details,omitempty"`
	// contains filtered or unexported fields
}

GuardPipelineError normalizes preflight failures before a run is dispatched.

func (*GuardPipelineError) Error ¶

func (e *GuardPipelineError) Error() string

func (*GuardPipelineError) ToolRuntimeCode ¶

func (e *GuardPipelineError) ToolRuntimeCode() string

func (*GuardPipelineError) ToolRuntimeDetails ¶

func (e *GuardPipelineError) ToolRuntimeDetails() map[string]interface{}

func (*GuardPipelineError) Unwrap ¶

func (e *GuardPipelineError) Unwrap() error

type Handler ¶

type Handler struct {
	// contains filtered or unexported fields
}

func NewHandler ¶

func NewHandler(manager *Controller) *Handler

func (*Handler) CancelEvalRun ¶

func (h *Handler) CancelEvalRun(c echo.Context) error

func (*Handler) CancelGroup ¶

func (h *Handler) CancelGroup(c echo.Context) error

func (*Handler) CancelRun ¶

func (h *Handler) CancelRun(c echo.Context) error

func (*Handler) CompareEvalRun ¶

func (h *Handler) CompareEvalRun(c echo.Context) error

func (*Handler) CreateBaseline ¶

func (h *Handler) CreateBaseline(c echo.Context) error

func (*Handler) CreateDataset ¶

func (h *Handler) CreateDataset(c echo.Context) error

func (*Handler) CreateDatasetVersion ¶

func (h *Handler) CreateDatasetVersion(c echo.Context) error

func (*Handler) CreateEvalRun ¶

func (h *Handler) CreateEvalRun(c echo.Context) error

func (*Handler) CreateEvalSpec ¶

func (h *Handler) CreateEvalSpec(c echo.Context) error

func (*Handler) CreateGroup ¶

func (h *Handler) CreateGroup(c echo.Context) error

func (*Handler) CreateRun ¶

func (h *Handler) CreateRun(c echo.Context) error

func (*Handler) EnsureBatch1ExecutionAssets ¶

func (h *Handler) EnsureBatch1ExecutionAssets(c echo.Context) error

func (*Handler) EnsureSelectorCuratedAssets ¶

func (h *Handler) EnsureSelectorCuratedAssets(c echo.Context) error

func (*Handler) EvaluateExecutionEquivalence ¶

func (h *Handler) EvaluateExecutionEquivalence(c echo.Context) error

func (*Handler) EvaluateSelectorGate ¶

func (h *Handler) EvaluateSelectorGate(c echo.Context) error

func (*Handler) EvaluateSkillCutoverBudgetGate ¶

func (h *Handler) EvaluateSkillCutoverBudgetGate(c echo.Context) error

func (*Handler) EvaluateSkillCutoverReadiness ¶

func (h *Handler) EvaluateSkillCutoverReadiness(c echo.Context) error

func (*Handler) GetComparisonReport ¶

func (h *Handler) GetComparisonReport(c echo.Context) error

func (*Handler) GetDataset ¶

func (h *Handler) GetDataset(c echo.Context) error

func (*Handler) GetDatasetVersion ¶

func (h *Handler) GetDatasetVersion(c echo.Context) error

func (*Handler) GetEvalRun ¶

func (h *Handler) GetEvalRun(c echo.Context) error

func (*Handler) GetEvalRunReport ¶

func (h *Handler) GetEvalRunReport(c echo.Context) error

func (*Handler) GetEvalSpec ¶

func (h *Handler) GetEvalSpec(c echo.Context) error

func (*Handler) GetEvolutionOverview ¶

func (h *Handler) GetEvolutionOverview(c echo.Context) error

func (*Handler) GetGroup ¶

func (h *Handler) GetGroup(c echo.Context) error

func (*Handler) GetGroupReport ¶

func (h *Handler) GetGroupReport(c echo.Context) error

func (*Handler) GetRun ¶

func (h *Handler) GetRun(c echo.Context) error

func (*Handler) GetRunDetail ¶

func (h *Handler) GetRunDetail(c echo.Context) error

func (*Handler) GetSkillEvolutionCase ¶

func (h *Handler) GetSkillEvolutionCase(c echo.Context) error

func (*Handler) GetSkillRevision ¶

func (h *Handler) GetSkillRevision(c echo.Context) error

func (*Handler) ImportDatasetBundle ¶

func (h *Handler) ImportDatasetBundle(c echo.Context) error

func (*Handler) ImportDatasetBundleFromSource ¶

func (h *Handler) ImportDatasetBundleFromSource(c echo.Context) error

func (*Handler) ListArtifacts ¶

func (h *Handler) ListArtifacts(c echo.Context) error

func (*Handler) ListBaselines ¶

func (h *Handler) ListBaselines(c echo.Context) error

func (*Handler) ListDatasetVersions ¶

func (h *Handler) ListDatasetVersions(c echo.Context) error

func (*Handler) ListDatasets ¶

func (h *Handler) ListDatasets(c echo.Context) error

func (*Handler) ListEvalRuns ¶

func (h *Handler) ListEvalRuns(c echo.Context) error

func (*Handler) ListEvalSpecs ¶

func (h *Handler) ListEvalSpecs(c echo.Context) error

func (*Handler) ListEvents ¶

func (h *Handler) ListEvents(c echo.Context) error

func (*Handler) ListGroupItems ¶

func (h *Handler) ListGroupItems(c echo.Context) error

func (*Handler) ListGroups ¶

func (h *Handler) ListGroups(c echo.Context) error

func (*Handler) ListRuns ¶

func (h *Handler) ListRuns(c echo.Context) error

func (*Handler) ListSkillDecisionHistory ¶

func (h *Handler) ListSkillDecisionHistory(c echo.Context) error

func (*Handler) ListSkillEvolutionCases ¶

func (h *Handler) ListSkillEvolutionCases(c echo.Context) error

func (*Handler) ListSkillRevisions ¶

func (h *Handler) ListSkillRevisions(c echo.Context) error

func (*Handler) OptimizeSkill ¶

func (h *Handler) OptimizeSkill(c echo.Context) error

func (*Handler) PerformGroupAction ¶

func (h *Handler) PerformGroupAction(c echo.Context) error

func (*Handler) PerformRunAction ¶

func (h *Handler) PerformRunAction(c echo.Context) error

func (*Handler) PreviewDatasetBundleFromSource ¶

func (h *Handler) PreviewDatasetBundleFromSource(c echo.Context) error

func (*Handler) PromoteGroup ¶

func (h *Handler) PromoteGroup(c echo.Context) error

func (*Handler) PromoteSkillRevision ¶

func (h *Handler) PromoteSkillRevision(c echo.Context) error

func (*Handler) RegisterRoutes ¶

func (h *Handler) RegisterRoutes(g *echo.Group)

func (*Handler) RetryFailedGroup ¶

func (h *Handler) RetryFailedGroup(c echo.Context) error

func (*Handler) RollbackSkillRevision ¶

func (h *Handler) RollbackSkillRevision(c echo.Context) error

func (*Handler) SetDetailProvider ¶

func (h *Handler) SetDetailProvider(provider RunDetailProvider)

func (*Handler) SetEvolutionProposalSummaryProvider ¶

func (h *Handler) SetEvolutionProposalSummaryProvider(provider EvolutionProposalSummaryProvider)

type HarnessAPICheck ¶

type HarnessAPICheck struct {
	Name          string `json:"name,omitempty"`
	Target        string `json:"target,omitempty"`
	Expectation   string `json:"expectation,omitempty"`
	RequiredCheck string `json:"required_check,omitempty"`
	FailureLabel  string `json:"failure_label,omitempty"`
}

type HarnessAdaptivePolicy ¶

type HarnessAdaptivePolicy struct {
	Profile             string `json:"profile,omitempty"`
	Reason              string `json:"reason,omitempty"`
	EnableExternalQA    bool   `json:"enable_external_qa,omitempty"`
	EnableBrowserQA     bool   `json:"enable_browser_qa,omitempty"`
	EnableCheckpoints   bool   `json:"enable_checkpoints,omitempty"`
	MaxRecoveryAttempts int    `json:"max_recovery_attempts,omitempty"`
	CheckpointInterval  int    `json:"checkpoint_interval,omitempty"`
}

func DeriveHarnessAdaptivePolicy ¶

func DeriveHarnessAdaptivePolicy(model string, kind RunKind, contract HarnessContract) HarnessAdaptivePolicy

type HarnessBrowserCheck ¶

type HarnessBrowserCheck struct {
	Name                string `json:"name,omitempty"`
	Target              string `json:"target,omitempty"`
	Expectation         string `json:"expectation,omitempty"`
	RequiredObservation string `json:"required_observation,omitempty"`
	RequiredArtifact    string `json:"required_artifact,omitempty"`
	FailureLabel        string `json:"failure_label,omitempty"`
	RequireScreenshot   bool   `json:"require_screenshot,omitempty"`
}

type HarnessCheckpoint ¶

type HarnessCheckpoint struct {
	Version           string          `json:"version,omitempty"`
	RunID             string          `json:"run_id,omitempty"`
	GroupID           string          `json:"group_id,omitempty"`
	GroupItemID       string          `json:"group_item_id,omitempty"`
	AttemptIndex      int             `json:"attempt_index,omitempty"`
	Goal              string          `json:"goal,omitempty"`
	Summary           string          `json:"summary,omitempty"`
	VerifiedEvidence  []string        `json:"verified_evidence,omitempty"`
	UnresolvedRisks   []string        `json:"unresolved_risks,omitempty"`
	FailureLabels     []string        `json:"failure_labels,omitempty"`
	NextContract      HarnessContract `json:"next_contract,omitempty"`
	EvaluatorInput    string          `json:"evaluator_input,omitempty"`
	RecommendedResume string          `json:"recommended_resume,omitempty"`
	CreatedAt         time.Time       `json:"created_at"`
}

type HarnessContract ¶

type HarnessContract struct {
	Deliverables          []string                  `json:"deliverables,omitempty"`
	SuccessCriteria       []string                  `json:"success_criteria,omitempty"`
	ExpectedArtifacts     []HarnessExpectedArtifact `json:"expected_artifacts,omitempty"`
	RequiredToolCalls     []string                  `json:"required_tool_calls,omitempty"`
	ForbiddenToolCalls    []string                  `json:"forbidden_tool_calls,omitempty"`
	RequiredChecks        []string                  `json:"required_checks,omitempty"`
	RequiredObservations  []string                  `json:"required_observations,omitempty"`
	ForbiddenObservations []string                  `json:"forbidden_observations,omitempty"`
	BrowserChecks         []HarnessBrowserCheck     `json:"browser_checks,omitempty"`
	APIChecks             []HarnessAPICheck         `json:"api_checks,omitempty"`
	FallbackOrder         []string                  `json:"fallback_order,omitempty"`
	StopConditions        []string                  `json:"stop_conditions,omitempty"`
	EvaluatorHints        []string                  `json:"evaluator_hints,omitempty"`
	RiskLevel             string                    `json:"risk_level,omitempty"`
}

func DecodeHarnessContract ¶

func DecodeHarnessContract(sources ...map[string]interface{}) HarnessContract

type HarnessExpectedArtifact ¶

type HarnessExpectedArtifact struct {
	Path      string `json:"path,omitempty"`
	Label     string `json:"label,omitempty"`
	MustExist bool   `json:"must_exist,omitempty"`
}

type HarnessSubagentExecutor ¶

type HarnessSubagentExecutor struct {
	// contains filtered or unexported fields
}

func NewSubagentExecutor ¶

func NewSubagentExecutor(manager *Controller, agents *config.AgentsConfig) *HarnessSubagentExecutor

func (*HarnessSubagentExecutor) ExecuteIsolated ¶

func (e *HarnessSubagentExecutor) ExecuteIsolated(ctx context.Context, req tools.SubagentRequest) (*tools.SubagentResult, error)

func (*HarnessSubagentExecutor) ExecuteSubagent ¶

func (e *HarnessSubagentExecutor) ExecuteSubagent(ctx context.Context, req tools.SubagentRequest) (*tools.SubagentResult, error)

type HarnessVerificationResult ¶

type HarnessVerificationResult struct {
	Passed         bool                     `json:"passed"`
	Retryable      bool                     `json:"retryable"`
	FailureLabel   string                   `json:"failure_label,omitempty"`
	Summary        string                   `json:"summary,omitempty"`
	OutcomeScore   float64                  `json:"outcome_score,omitempty"`
	EvidenceScore  float64                  `json:"evidence_score,omitempty"`
	ExecutionScore float64                  `json:"execution_score,omitempty"`
	Observations   []string                 `json:"observations,omitempty"`
	Checks         []map[string]interface{} `json:"checks,omitempty"`
	Artifacts      []map[string]interface{} `json:"artifacts,omitempty"`
	TraceSummary   map[string]interface{}   `json:"trace_summary,omitempty"`
}

type ImportDatasetBundleEvalSpec ¶

type ImportDatasetBundleEvalSpec struct {
	Name            string                 `json:"name"`
	OwnerUserID     string                 `json:"owner_user_id,omitempty"`
	Subject         string                 `json:"subject,omitempty"`
	RunKind         RunKind                `json:"run_kind,omitempty"`
	Profile         string                 `json:"profile,omitempty"`
	SchedulerConfig GroupSchedulerConfig   `json:"scheduler,omitempty"`
	ScoringConfig   GroupScoringConfig     `json:"scoring,omitempty"`
	RuntimePolicy   map[string]interface{} `json:"runtime_policy,omitempty"`
	Metadata        map[string]interface{} `json:"metadata,omitempty"`
}

type ImportDatasetBundleFromSourceRequest ¶

type ImportDatasetBundleFromSourceRequest struct {
	SourceType string `json:"source_type,omitempty"`
	Path       string `json:"path,omitempty"`
	Source     string `json:"source,omitempty"`
	BundlePath string `json:"bundle_path,omitempty"`
	Version    string `json:"version,omitempty"`
	MakeActive *bool  `json:"make_active,omitempty"`
}

type ImportDatasetBundleRequest ¶

type ImportDatasetBundleRequest struct {
	SourceType string                        `json:"source_type,omitempty"`
	SourceRef  string                        `json:"source_ref,omitempty"`
	Dataset    DatasetSpec                   `json:"dataset"`
	Version    DatasetVersionSpec            `json:"version"`
	EvalSpecs  []ImportDatasetBundleEvalSpec `json:"eval_specs,omitempty"`
	MakeActive bool                          `json:"make_active,omitempty"`
}

type ImportDatasetBundleResult ¶

type ImportDatasetBundleResult struct {
	Dataset        *Dataset        `json:"dataset,omitempty"`
	DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
	EvalSpecs      []EvalSpec      `json:"eval_specs,omitempty"`
}

type JudgeEvaluationRequest ¶

type JudgeEvaluationRequest struct {
	Model        string
	Group        *RunGroup
	Item         *RunGroupItem
	Run          *Run
	Calibration  map[string]interface{}
	Verification *HarnessVerificationResult
}

type JudgeEvaluationResult ¶

type JudgeEvaluationResult struct {
	Backend string
	Model   string
	Verdict ScoreVerdict
	Score   float64
	Reason  string
	Trace   map[string]interface{}
}

type JudgeEvaluator ¶

type JudgeEvaluator interface {
	Evaluate(ctx context.Context, req JudgeEvaluationRequest) (*JudgeEvaluationResult, error)
}

type LLMJudgeEvaluator ¶

type LLMJudgeEvaluator struct {
	// contains filtered or unexported fields
}

func NewLLMJudgeEvaluator ¶

func NewLLMJudgeEvaluator(llmCaller judgeLLMCaller) *LLMJudgeEvaluator

func (*LLMJudgeEvaluator) Evaluate ¶

func (e *LLMJudgeEvaluator) Evaluate(ctx context.Context, req JudgeEvaluationRequest) (*JudgeEvaluationResult, error)

type LegacyStoreMigrationResult ¶

type LegacyStoreMigrationResult struct {
	SourcePath   string
	ArchivedPath string
	RowsImported int
}

LegacyStoreMigrationResult describes a one-time import from the legacy standalone harness.db file into the shared blue.db store.

func MigrateLegacyStore ¶

func MigrateLegacyStore(ctx context.Context, db *sql.DB, dataDir string) (*LegacyStoreMigrationResult, error)

MigrateLegacyStore imports rows from the legacy harness.db file into the shared SQLite database and archives the standalone file afterward.

type Manager ¶

type Manager interface {
	Submit(ctx context.Context, spec RunSpec) (*Run, error)
	Get(ctx context.Context, id string) (*Run, error)
	List(ctx context.Context, filter RunFilter) ([]Run, error)
	Cancel(ctx context.Context, id string, reason string) error
	PerformAction(ctx context.Context, id string, action string, input map[string]interface{}) (*Run, error)
	SpawnChild(ctx context.Context, parentID string, spec RunSpec) (*Run, error)
	AppendEvent(ctx context.Context, event RunEvent) error
	AttachArtifact(ctx context.Context, ref ArtifactRef) error
}

type OptimizationReason ¶

type OptimizationReason string

const (
	OptimizationReasonRuntimeSkillFailure OptimizationReason = "runtime_skill_failure"
	OptimizationReasonRuntimeSkillCapture OptimizationReason = "runtime_skill_capture"
	OptimizationReasonSelectorGateFailed  OptimizationReason = "selector_gate_failed"
	OptimizationReasonExecutionGateFailed OptimizationReason = "execution_gate_failed"
	OptimizationReasonBudgetGateFailed    OptimizationReason = "budget_gate_failed"
	OptimizationReasonCutoverBlocking     OptimizationReason = "cutover_readiness_blocking"
	OptimizationReasonManualSkillOptimize OptimizationReason = "manual_skill_optimize"
	OptimizationReasonSelectorGatePassed  OptimizationReason = "selector_gate_passed"
	OptimizationReasonExecutionGatePassed OptimizationReason = "execution_gate_passed"
	OptimizationReasonBudgetGatePassed    OptimizationReason = "budget_gate_passed"
)

type OptimizationSurface ¶

type OptimizationSurface string

const (
	OptimizationSurfaceConstraints     OptimizationSurface = "constraints"
	OptimizationSurfaceRunnerCode      OptimizationSurface = "runner_code"
	OptimizationSurfaceBuildRecipe     OptimizationSurface = "build_recipe"
	OptimizationSurfaceSkillDefinition OptimizationSurface = "skill_definition"
)

type OptimizationTrigger ¶

type OptimizationTrigger struct {
	Reason              OptimizationReason     `json:"reason"`
	CandidateID         string                 `json:"candidate_id,omitempty"`
	EvalRunID           string                 `json:"eval_run_id,omitempty"`
	BaseEvalRunID       string                 `json:"base_eval_run_id,omitempty"`
	OptimizationRun     bool                   `json:"optimization_run,omitempty"`
	OptimizationSurface OptimizationSurface    `json:"optimization_surface,omitempty"`
	Metadata            map[string]interface{} `json:"metadata,omitempty"`
}

type OptimizationTriggerer ¶

type OptimizationTriggerer interface {
	TriggerOptimization(ctx context.Context, event OptimizationTrigger) error
}

type PolicyResolver ¶

type PolicyResolver struct {
	// contains filtered or unexported fields
}

func NewPolicyResolver ¶

func NewPolicyResolver(cfg config.HarnessConfig, agents *config.AgentsConfig) *PolicyResolver

func (*PolicyResolver) ArtifactRoot ¶

func (r *PolicyResolver) ArtifactRoot(runID string) string

func (*PolicyResolver) IsProtectedPath ¶

func (r *PolicyResolver) IsProtectedPath(path string, artifactRoot string) bool

func (*PolicyResolver) Resolve ¶

func (r *PolicyResolver) Resolve(spec RunSpec) RunSpec

func (*PolicyResolver) ResolveChild ¶

func (r *PolicyResolver) ResolveChild(parent *Run, spec RunSpec) (RunSpec, error)

type ProposalReflector ¶

type ProposalReflector interface {
	Reflect(ctx context.Context, input selfreflect.Input) (*selfreflect.Result, error)
}

type Run ¶

type Run struct {
	ID             string                 `json:"id"`
	RootRunID      string                 `json:"root_run_id"`
	ParentRunID    string                 `json:"parent_run_id,omitempty"`
	GroupID        string                 `json:"group_id,omitempty"`
	GroupItemID    string                 `json:"group_item_id,omitempty"`
	AttemptIndex   int                    `json:"attempt_index,omitempty"`
	Kind           RunKind                `json:"kind"`
	Status         RunStatus              `json:"status"`
	RuntimeState   agentpkg.RuntimeState  `json:"runtime_state,omitempty"`
	UserID         string                 `json:"user_id,omitempty"`
	ConversationID string                 `json:"conversation_id,omitempty"`
	SessionID      string                 `json:"session_id,omitempty"`
	AgentID        string                 `json:"agent_id,omitempty"`
	Goal           string                 `json:"goal"`
	ProviderID     string                 `json:"provider_id,omitempty"`
	Model          string                 `json:"model,omitempty"`
	Result         string                 `json:"result,omitempty"`
	Error          string                 `json:"error,omitempty"`
	Depth          int                    `json:"depth"`
	CurrentStep    int                    `json:"current_step"`
	Progress       int                    `json:"progress"`
	WorkspaceRoot  string                 `json:"workspace_root,omitempty"`
	ArtifactRoot   string                 `json:"artifact_root,omitempty"`
	SandboxMode    string                 `json:"sandbox_mode,omitempty"`
	ApprovalMode   ApprovalMode           `json:"approval_mode,omitempty"`
	MaxDuration    time.Duration          `json:"max_duration,omitempty"`
	MaxSteps       int                    `json:"max_steps,omitempty"`
	MaxToolRounds  int                    `json:"max_tool_rounds,omitempty"`
	MaxSubagents   int                    `json:"max_subagents,omitempty"`
	MaxDepth       int                    `json:"max_depth,omitempty"`
	Metadata       map[string]interface{} `json:"metadata,omitempty"`
	CreatedAt      time.Time              `json:"created_at"`
	UpdatedAt      time.Time              `json:"updated_at"`
	StartedAt      *time.Time             `json:"started_at,omitempty"`
	FinishedAt     *time.Time             `json:"finished_at,omitempty"`
}

type RunActionAvailability ¶

type RunActionAvailability struct {
	Items []ActionDescriptor `json:"items,omitempty"`
}

type RunContext ¶

type RunContext struct {
	Run        *Run
	Parent     *Run
	Driver     Driver
	Controller *Controller
	Values     map[string]interface{}
}

RunContext captures the run-scoped execution envelope shared by controller, middlewares, and drivers during dispatch.

func GetRunContext ¶

func GetRunContext(ctx context.Context) *RunContext

GetRunContext returns the run-scoped execution envelope carried on the context.

type RunDetail ¶

type RunDetail struct {
	Run              *Run                     `json:"run"`
	Actions          RunActionAvailability    `json:"actions"`
	Events           []RunEvent               `json:"events"`
	Artifacts        []ArtifactRef            `json:"artifacts"`
	RunTrace         *RunTrace                `json:"run_trace,omitempty"`
	PendingApprovals []map[string]interface{} `json:"pending_approvals,omitempty"`
	PendingQuestions []map[string]interface{} `json:"pending_questions,omitempty"`
}

type RunDetailProvider ¶

type RunDetailProvider interface {
	PendingApprovals(runID string) []map[string]interface{}
	PendingQuestions(runID string) []map[string]interface{}
}

type RunEnv ¶

type RunEnv struct {
	Manager    *Controller
	RunContext *RunContext
}

type RunEvent ¶

type RunEvent struct {
	ID             string    `json:"id"`
	RunID          string    `json:"run_id"`
	RootRunID      string    `json:"root_run_id,omitempty"`
	ParentRunID    string    `json:"parent_run_id,omitempty"`
	Type           string    `json:"type"`
	StepIndex      int       `json:"step_index,omitempty"`
	ToolName       string    `json:"tool_name,omitempty"`
	CapabilityKind string    `json:"capability_kind,omitempty"`
	Message        string    `json:"message,omitempty"`
	PayloadJSON    string    `json:"payload_json,omitempty"`
	CreatedAt      time.Time `json:"created_at"`
}

type RunFilter ¶

type RunFilter struct {
	UserID         string
	Kind           RunKind
	Kinds          []RunKind
	Statuses       []RunStatus
	ConversationID string
	GroupID        string
	GroupItemID    string
	ParentRunID    string
	RootRunID      string
	Limit          int
}

type RunGroup ¶

type RunGroup struct {
	ID              string                 `json:"id"`
	Kind            RunGroupKind           `json:"kind"`
	Title           string                 `json:"title,omitempty"`
	Status          RunGroupStatus         `json:"status"`
	OwnerUserID     string                 `json:"owner_user_id,omitempty"`
	Subject         string                 `json:"subject,omitempty"`
	SchedulerConfig GroupSchedulerConfig   `json:"scheduler_config,omitempty"`
	ScoringConfig   GroupScoringConfig     `json:"scoring_config,omitempty"`
	Metadata        map[string]interface{} `json:"metadata,omitempty"`
	Summary         map[string]interface{} `json:"summary,omitempty"`
	CreatedAt       time.Time              `json:"created_at"`
	UpdatedAt       time.Time              `json:"updated_at"`
	StartedAt       *time.Time             `json:"started_at,omitempty"`
	FinishedAt      *time.Time             `json:"finished_at,omitempty"`
}

type RunGroupFilter ¶

type RunGroupFilter struct {
	OwnerUserID string
	Kinds       []RunGroupKind
	Statuses    []RunGroupStatus
	Limit       int
}

type RunGroupItem ¶

type RunGroupItem struct {
	ID             string                 `json:"id"`
	GroupID        string                 `json:"group_id"`
	Index          int                    `json:"index"`
	RunKind        RunKind                `json:"run_kind"`
	Profile        string                 `json:"profile,omitempty"`
	Input          map[string]interface{} `json:"input,omitempty"`
	Expected       map[string]interface{} `json:"expected,omitempty"`
	Metadata       map[string]interface{} `json:"metadata,omitempty"`
	Status         RunGroupItemStatus     `json:"status"`
	LatestRunID    string                 `json:"latest_run_id,omitempty"`
	AttemptCount   int                    `json:"attempt_count,omitempty"`
	MaxAttempts    int                    `json:"max_attempts,omitempty"`
	LeaseOwner     string                 `json:"lease_owner,omitempty"`
	LeaseExpiresAt *time.Time             `json:"lease_expires_at,omitempty"`
	CreatedAt      time.Time              `json:"created_at"`
	UpdatedAt      time.Time              `json:"updated_at"`
}

type RunGroupItemSpec ¶

type RunGroupItemSpec struct {
	RunKind  RunKind                `json:"run_kind"`
	Profile  string                 `json:"profile,omitempty"`
	Input    map[string]interface{} `json:"input,omitempty"`
	Expected map[string]interface{} `json:"expected,omitempty"`
	Metadata map[string]interface{} `json:"metadata,omitempty"`
}

type RunGroupItemStatus ¶

type RunGroupItemStatus string

const (
	RunGroupItemStatusPending   RunGroupItemStatus = "pending"
	RunGroupItemStatusQueued    RunGroupItemStatus = "queued"
	RunGroupItemStatusRunning   RunGroupItemStatus = "running"
	RunGroupItemStatusScoring   RunGroupItemStatus = "scoring"
	RunGroupItemStatusPassed    RunGroupItemStatus = "passed"
	RunGroupItemStatusFailed    RunGroupItemStatus = "failed"
	RunGroupItemStatusError     RunGroupItemStatus = "error"
	RunGroupItemStatusCancelled RunGroupItemStatus = "cancelled"
)

type RunGroupKind ¶

type RunGroupKind string

const (
	RunGroupKindEval       RunGroupKind = "eval"
	RunGroupKindExperiment RunGroupKind = "experiment"
	RunGroupKindBatch      RunGroupKind = "batch"
)

type RunGroupReport ¶

type RunGroupReport struct {
	Group           *RunGroup                         `json:"group"`
	Items           []RunGroupItem                    `json:"items,omitempty"`
	VerdictCounts   map[string]int                    `json:"verdict_counts,omitempty"`
	OverallScore    float64                           `json:"overall_score,omitempty"`
	PassRate        float64                           `json:"pass_rate,omitempty"`
	Breakdown       map[string]interface{}            `json:"breakdown,omitempty"`
	FailedItems     []map[string]interface{}          `json:"failed_items,omitempty"`
	LinkedRuns      []Run                             `json:"linked_runs,omitempty"`
	Artifacts       []ArtifactRef                     `json:"artifacts,omitempty"`
	Scorecards      []Scorecard                       `json:"scorecards,omitempty"`
	RuntimeEvidence map[string][]RuntimeEvidenceEntry `json:"runtime_evidence,omitempty"`
	RuntimeTraces   map[string]RunTrace               `json:"runtime_traces,omitempty"`
	ItemContracts   map[string]HarnessContract        `json:"item_contracts,omitempty"`
	Checkpoints     []CheckpointArtifact              `json:"checkpoints,omitempty"`
}

type RunGroupSpec ¶

type RunGroupSpec struct {
	Kind            RunGroupKind           `json:"kind"`
	Title           string                 `json:"title,omitempty"`
	Subject         string                 `json:"subject,omitempty"`
	OwnerUserID     string                 `json:"owner_user_id,omitempty"`
	Metadata        map[string]interface{} `json:"metadata,omitempty"`
	SchedulerConfig GroupSchedulerConfig   `json:"scheduler"`
	ScoringConfig   GroupScoringConfig     `json:"scoring"`
	Items           []RunGroupItemSpec     `json:"items"`
}

type RunGroupStatus ¶

type RunGroupStatus string

const (
	RunGroupStatusPending   RunGroupStatus = "pending"
	RunGroupStatusQueued    RunGroupStatus = "queued"
	RunGroupStatusRunning   RunGroupStatus = "running"
	RunGroupStatusScoring   RunGroupStatus = "scoring"
	RunGroupStatusCompleted RunGroupStatus = "completed"
	RunGroupStatusPartial   RunGroupStatus = "partial"
	RunGroupStatusFailed    RunGroupStatus = "failed"
	RunGroupStatusCancelled RunGroupStatus = "cancelled"
)

type RunKind ¶

type RunKind string

const (
	RunKindAgentTask RunKind = "agent_task"
	RunKindResearch  RunKind = "research"
	RunKindSubagent  RunKind = "subagent"
	RunKindWorkflow  RunKind = "workflow"
)

type RunSpec ¶

type RunSpec struct {
	Kind           RunKind                `json:"kind"`
	Goal           string                 `json:"goal"`
	UserID         string                 `json:"user_id,omitempty"`
	ConversationID string                 `json:"conversation_id,omitempty"`
	SessionID      string                 `json:"session_id,omitempty"`
	ParentRunID    string                 `json:"parent_run_id,omitempty"`
	GroupID        string                 `json:"group_id,omitempty"`
	GroupItemID    string                 `json:"group_item_id,omitempty"`
	AttemptIndex   int                    `json:"attempt_index,omitempty"`
	AgentID        string                 `json:"agent_id,omitempty"`
	ProviderID     string                 `json:"provider_id,omitempty"`
	Model          string                 `json:"model,omitempty"`
	WorkspaceRoot  string                 `json:"workspace_root,omitempty"`
	SandboxMode    string                 `json:"sandbox_mode,omitempty"`
	ApprovalMode   ApprovalMode           `json:"approval_mode,omitempty"`
	MaxDuration    time.Duration          `json:"max_duration,omitempty"`
	MaxSteps       int                    `json:"max_steps,omitempty"`
	MaxToolRounds  int                    `json:"max_tool_rounds,omitempty"`
	MaxSubagents   int                    `json:"max_subagents,omitempty"`
	MaxDepth       int                    `json:"max_depth,omitempty"`
	Metadata       map[string]interface{} `json:"metadata,omitempty"`
}

type RunStatus ¶

type RunStatus string

const (
	RunStatusPending      RunStatus = "pending"
	RunStatusPlanning     RunStatus = "planning"
	RunStatusWaitingInput RunStatus = "waiting_input"
	RunStatusExecuting    RunStatus = "executing"
	RunStatusVerifying    RunStatus = "verifying"
	RunStatusCompleted    RunStatus = "completed"
	RunStatusFailed       RunStatus = "failed"
	RunStatusCancelled    RunStatus = "cancelled"
	RunStatusAborted      RunStatus = "aborted"
)

type RunTrace ¶

type RunTrace struct {
	RunID       string          `json:"run_id"`
	RootRunID   string          `json:"root_run_id,omitempty"`
	ParentRunID string          `json:"parent_run_id,omitempty"`
	Kind        RunKind         `json:"kind,omitempty"`
	Status      RunStatus       `json:"status,omitempty"`
	StartedAt   *time.Time      `json:"started_at,omitempty"`
	FinishedAt  *time.Time      `json:"finished_at,omitempty"`
	LatencyMs   int64           `json:"latency_ms,omitempty"`
	Stages      []RunTraceStage `json:"stages,omitempty"`
	Events      []RunTraceEvent `json:"events,omitempty"`
	Artifacts   []ArtifactRef   `json:"artifacts,omitempty"`
}

type RunTraceCollector ¶

type RunTraceCollector struct {
	// contains filtered or unexported fields
}

func NewRunTraceCollector ¶

func NewRunTraceCollector(manager *Controller) *RunTraceCollector

func (*RunTraceCollector) Middleware ¶

func (c *RunTraceCollector) Middleware() ExecutionMiddleware

func (*RunTraceCollector) Snapshot ¶

func (c *RunTraceCollector) Snapshot(ctx context.Context, runID string) (*RunTrace, error)

type RunTraceEvent ¶

type RunTraceEvent struct {
	Type           string    `json:"type"`
	Message        string    `json:"message,omitempty"`
	StepIndex      int       `json:"step_index,omitempty"`
	ToolName       string    `json:"tool_name,omitempty"`
	CapabilityKind string    `json:"capability_kind,omitempty"`
	CreatedAt      time.Time `json:"created_at"`
}

type RunTraceProvider ¶

type RunTraceProvider interface {
	Snapshot(ctx context.Context, runID string) (*RunTrace, error)
}

type RunTraceStage ¶

type RunTraceStage struct {
	Stage     RuntimeStage           `json:"stage"`
	Message   string                 `json:"message,omitempty"`
	Status    string                 `json:"status,omitempty"`
	Details   map[string]interface{} `json:"details,omitempty"`
	CreatedAt time.Time              `json:"created_at"`
}

type RuntimeEvidenceEntry ¶

type RuntimeEvidenceEntry struct {
	ID           string    `json:"id"`
	RunID        string    `json:"run_id"`
	StepIndex    int       `json:"step_index,omitempty"`
	PlannerRound int       `json:"planner_round,omitempty"`
	EventType    string    `json:"event_type"`
	Summary      string    `json:"summary,omitempty"`
	PayloadJSON  string    `json:"payload_json,omitempty"`
	CreatedAt    time.Time `json:"created_at"`
}

type RuntimeEvidenceProvider ¶

type RuntimeEvidenceProvider interface {
	ListRuntimeEvidence(ctx context.Context, run *Run) ([]RuntimeEvidenceEntry, error)
}

type RuntimeObserver ¶

type RuntimeObserver struct {
	// contains filtered or unexported fields
}

func NewRuntimeObserver ¶

func NewRuntimeObserver(manager *Controller) *RuntimeObserver

func (*RuntimeObserver) OnApprovalRequested ¶

func (o *RuntimeObserver) OnApprovalRequested(event tools.ApprovalRuntimeEvent)

func (*RuntimeObserver) OnApprovalResolved ¶

func (o *RuntimeObserver) OnApprovalResolved(event tools.ApprovalRuntimeEvent)

func (*RuntimeObserver) OnQuestionRequested ¶

func (o *RuntimeObserver) OnQuestionRequested(event tools.QuestionRuntimeEvent)

func (*RuntimeObserver) OnQuestionResolved ¶

func (o *RuntimeObserver) OnQuestionResolved(event tools.QuestionRuntimeEvent)

func (*RuntimeObserver) OnToolFinished ¶

func (o *RuntimeObserver) OnToolFinished(event tools.ToolRuntimeEvent)

func (*RuntimeObserver) OnToolRequested ¶

func (o *RuntimeObserver) OnToolRequested(event tools.ToolRuntimeEvent)

type RuntimeReflectionCoordinator ¶

type RuntimeReflectionCoordinator struct {
	// contains filtered or unexported fields
}

func NewRuntimeReflectionCoordinator ¶

func NewRuntimeReflectionCoordinator(controller *Controller, cfg config.RuntimeReflectionConfig) *RuntimeReflectionCoordinator

func (*RuntimeReflectionCoordinator) OnApprovalRequested ¶

func (c *RuntimeReflectionCoordinator) OnApprovalRequested(event tools.ApprovalRuntimeEvent)

func (*RuntimeReflectionCoordinator) OnApprovalResolved ¶

func (c *RuntimeReflectionCoordinator) OnApprovalResolved(event tools.ApprovalRuntimeEvent)

func (*RuntimeReflectionCoordinator) OnQuestionRequested ¶

func (c *RuntimeReflectionCoordinator) OnQuestionRequested(event tools.QuestionRuntimeEvent)

func (*RuntimeReflectionCoordinator) OnQuestionResolved ¶

func (c *RuntimeReflectionCoordinator) OnQuestionResolved(event tools.QuestionRuntimeEvent)

func (*RuntimeReflectionCoordinator) OnRunTerminal ¶

func (c *RuntimeReflectionCoordinator) OnRunTerminal(ctx context.Context, run *Run)

func (*RuntimeReflectionCoordinator) OnToolFinished ¶

func (c *RuntimeReflectionCoordinator) OnToolFinished(event tools.ToolRuntimeEvent)

func (*RuntimeReflectionCoordinator) OnToolRequested ¶

func (c *RuntimeReflectionCoordinator) OnToolRequested(event tools.ToolRuntimeEvent)

type RuntimeStage ¶

type RuntimeStage string

const (
	RuntimeStageNormalize RuntimeStage = "normalize"
	RuntimeStagePolicy    RuntimeStage = "policy"
	RuntimeStageApproval  RuntimeStage = "approval"
	RuntimeStageExecute   RuntimeStage = "execute"
	RuntimeStageFinalize  RuntimeStage = "finalize"
)

type SQLiteStore ¶

type SQLiteStore struct {
	// contains filtered or unexported fields
}

func NewSQLiteStore ¶

func NewSQLiteStore(db *sql.DB) (*SQLiteStore, error)

func NewSQLiteStoreWithReadDB ¶

func NewSQLiteStoreWithReadDB(writeDB, readDB *sql.DB) (*SQLiteStore, error)

func (*SQLiteStore) AppendEvent ¶

func (s *SQLiteStore) AppendEvent(ctx context.Context, event RunEvent) error

func (*SQLiteStore) AttachArtifact ¶

func (s *SQLiteStore) AttachArtifact(ctx context.Context, ref ArtifactRef) error

func (*SQLiteStore) AttachScorecard ¶

func (s *SQLiteStore) AttachScorecard(ctx context.Context, scorecard Scorecard) error

func (*SQLiteStore) BuildEvolutionOverview ¶

func (s *SQLiteStore) BuildEvolutionOverview(
	ctx context.Context,
	skillID string,
	_ string,
) (*EvolutionOverview, error)

func (*SQLiteStore) ClaimNextGroupItem ¶

func (s *SQLiteStore) ClaimNextGroupItem(ctx context.Context, groupID, workerID string, leaseTTL time.Duration, now time.Time) (*RunGroupItem, error)

func (*SQLiteStore) ClearDefaultBaseline ¶

func (s *SQLiteStore) ClearDefaultBaseline(ctx context.Context, evalSpecID string) error

func (*SQLiteStore) CountGroupItemsByStatuses ¶

func (s *SQLiteStore) CountGroupItemsByStatuses(ctx context.Context, groupID string, statuses []RunGroupItemStatus) (int, error)

func (*SQLiteStore) CreateBaseline ¶

func (s *SQLiteStore) CreateBaseline(ctx context.Context, baseline *Baseline) error

func (*SQLiteStore) CreateComparisonReport ¶

func (s *SQLiteStore) CreateComparisonReport(ctx context.Context, report *ComparisonReport) error

func (*SQLiteStore) CreateDataset ¶

func (s *SQLiteStore) CreateDataset(ctx context.Context, dataset *Dataset) error

func (*SQLiteStore) CreateDatasetVersion ¶

func (s *SQLiteStore) CreateDatasetVersion(ctx context.Context, version *DatasetVersion) error

func (*SQLiteStore) CreateEvalRun ¶

func (s *SQLiteStore) CreateEvalRun(ctx context.Context, evalRun *EvalRun) error

func (*SQLiteStore) CreateEvalSpec ¶

func (s *SQLiteStore) CreateEvalSpec(ctx context.Context, spec *EvalSpec) error

func (*SQLiteStore) CreateGroup ¶

func (s *SQLiteStore) CreateGroup(ctx context.Context, group *RunGroup) error

func (*SQLiteStore) CreateGroupItems ¶

func (s *SQLiteStore) CreateGroupItems(ctx context.Context, items []RunGroupItem) error

func (*SQLiteStore) CreateRun ¶

func (s *SQLiteStore) CreateRun(ctx context.Context, run *Run) error

func (*SQLiteStore) CreateSkillEvolutionCase ¶

func (s *SQLiteStore) CreateSkillEvolutionCase(ctx context.Context, evolutionCase *SkillEvolutionCase) error

func (*SQLiteStore) CreateSkillRevision ¶

func (s *SQLiteStore) CreateSkillRevision(ctx context.Context, revision *SkillRevision) error

func (*SQLiteStore) DeleteRun ¶

func (s *SQLiteStore) DeleteRun(ctx context.Context, id string) error

func (*SQLiteStore) FindDatasetByOwnerAndName ¶

func (s *SQLiteStore) FindDatasetByOwnerAndName(ctx context.Context, ownerUserID, name string) (*Dataset, error)

func (*SQLiteStore) FindDatasetVersionByDatasetAndVersion ¶

func (s *SQLiteStore) FindDatasetVersionByDatasetAndVersion(ctx context.Context, datasetID, version string) (*DatasetVersion, error)

func (*SQLiteStore) FindEvalSpecByOwnerDatasetAndName ¶

func (s *SQLiteStore) FindEvalSpecByOwnerDatasetAndName(ctx context.Context, ownerUserID, datasetID, name string) (*EvalSpec, error)

func (*SQLiteStore) FindLatestSkillEvolutionCaseByDedupKey ¶

func (s *SQLiteStore) FindLatestSkillEvolutionCaseByDedupKey(ctx context.Context, skillID string, ownerUserID string, dedupKey string) (*SkillEvolutionCase, error)

func (*SQLiteStore) FindRunByMetadata ¶

func (s *SQLiteStore) FindRunByMetadata(ctx context.Context, kind RunKind, key, value string) (*Run, error)

func (*SQLiteStore) GetBaseline ¶

func (s *SQLiteStore) GetBaseline(ctx context.Context, id string) (*Baseline, error)

func (*SQLiteStore) GetComparisonReport ¶

func (s *SQLiteStore) GetComparisonReport(ctx context.Context, id string) (*ComparisonReport, error)

func (*SQLiteStore) GetDataset ¶

func (s *SQLiteStore) GetDataset(ctx context.Context, id string) (*Dataset, error)

func (*SQLiteStore) GetDatasetVersion ¶

func (s *SQLiteStore) GetDatasetVersion(ctx context.Context, id string) (*DatasetVersion, error)

func (*SQLiteStore) GetEvalRun ¶

func (s *SQLiteStore) GetEvalRun(ctx context.Context, id string) (*EvalRun, error)

func (*SQLiteStore) GetEvalSpec ¶

func (s *SQLiteStore) GetEvalSpec(ctx context.Context, id string) (*EvalSpec, error)

func (*SQLiteStore) GetGroup ¶

func (s *SQLiteStore) GetGroup(ctx context.Context, id string) (*RunGroup, error)

func (*SQLiteStore) GetGroupItem ¶

func (s *SQLiteStore) GetGroupItem(ctx context.Context, id string) (*RunGroupItem, error)

func (*SQLiteStore) GetRun ¶

func (s *SQLiteStore) GetRun(ctx context.Context, id string) (*Run, error)

func (*SQLiteStore) GetSkillEvolutionCase ¶

func (s *SQLiteStore) GetSkillEvolutionCase(ctx context.Context, id string) (*SkillEvolutionCase, error)

func (*SQLiteStore) GetSkillRevision ¶

func (s *SQLiteStore) GetSkillRevision(ctx context.Context, id string) (*SkillRevision, error)

func (*SQLiteStore) LatestScorecardForItem ¶

func (s *SQLiteStore) LatestScorecardForItem(ctx context.Context, groupItemID string) (*Scorecard, error)

func (*SQLiteStore) ListArtifacts ¶

func (s *SQLiteStore) ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)

func (*SQLiteStore) ListBaselines ¶

func (s *SQLiteStore) ListBaselines(ctx context.Context, filter BaselineFilter) ([]Baseline, error)

func (*SQLiteStore) ListDatasetVersions ¶

func (s *SQLiteStore) ListDatasetVersions(ctx context.Context, datasetID string, limit int) ([]DatasetVersion, error)

func (*SQLiteStore) ListDatasets ¶

func (s *SQLiteStore) ListDatasets(ctx context.Context, filter DatasetFilter) ([]Dataset, error)

func (*SQLiteStore) ListEvalRuns ¶

func (s *SQLiteStore) ListEvalRuns(ctx context.Context, filter EvalRunFilter) ([]EvalRun, error)

func (*SQLiteStore) ListEvalSpecs ¶

func (s *SQLiteStore) ListEvalSpecs(ctx context.Context, filter EvalSpecFilter) ([]EvalSpec, error)

func (*SQLiteStore) ListEvents ¶

func (s *SQLiteStore) ListEvents(ctx context.Context, runID string, limit int) ([]RunEvent, error)

func (*SQLiteStore) ListGroupItems ¶

func (s *SQLiteStore) ListGroupItems(ctx context.Context, groupID string) ([]RunGroupItem, error)

func (*SQLiteStore) ListGroups ¶

func (s *SQLiteStore) ListGroups(ctx context.Context, filter RunGroupFilter) ([]RunGroup, error)

func (*SQLiteStore) ListRuns ¶

func (s *SQLiteStore) ListRuns(ctx context.Context, filter RunFilter) ([]Run, error)

func (*SQLiteStore) ListScorecards ¶

func (s *SQLiteStore) ListScorecards(ctx context.Context, groupID string) ([]Scorecard, error)

func (*SQLiteStore) ListSkillDecisionHistory ¶

func (s *SQLiteStore) ListSkillDecisionHistory(ctx context.Context, filter SkillDecisionHistoryFilter) ([]SkillDecisionHistoryEntry, error)

func (*SQLiteStore) ListSkillEvolutionCases ¶

func (s *SQLiteStore) ListSkillEvolutionCases(ctx context.Context, filter SkillEvolutionCaseFilter) ([]SkillEvolutionCase, error)

func (*SQLiteStore) ListSkillRevisions ¶

func (s *SQLiteStore) ListSkillRevisions(ctx context.Context, filter SkillRevisionFilter) ([]SkillRevision, error)

func (*SQLiteStore) UpdateDataset ¶

func (s *SQLiteStore) UpdateDataset(ctx context.Context, dataset *Dataset) error

func (*SQLiteStore) UpdateEvalRun ¶

func (s *SQLiteStore) UpdateEvalRun(ctx context.Context, evalRun *EvalRun) error

func (*SQLiteStore) UpdateEvalSpec ¶

func (s *SQLiteStore) UpdateEvalSpec(ctx context.Context, spec *EvalSpec) error

func (*SQLiteStore) UpdateGroup ¶

func (s *SQLiteStore) UpdateGroup(ctx context.Context, group *RunGroup) error

func (*SQLiteStore) UpdateGroupItem ¶

func (s *SQLiteStore) UpdateGroupItem(ctx context.Context, item *RunGroupItem) error

func (*SQLiteStore) UpdateRun ¶

func (s *SQLiteStore) UpdateRun(ctx context.Context, run *Run) error

func (*SQLiteStore) UpdateRunIfMaterialStateMatches ¶

func (s *SQLiteStore) UpdateRunIfMaterialStateMatches(ctx context.Context, expected *Run, next *Run) (bool, error)

func (*SQLiteStore) UpdateSkillEvolutionCase ¶

func (s *SQLiteStore) UpdateSkillEvolutionCase(ctx context.Context, evolutionCase *SkillEvolutionCase) error

func (*SQLiteStore) UpdateSkillRevision ¶

func (s *SQLiteStore) UpdateSkillRevision(ctx context.Context, revision *SkillRevision) error

type ScoreVerdict ¶

type ScoreVerdict string

const (
	ScoreVerdictPass    ScoreVerdict = "pass"
	ScoreVerdictFail    ScoreVerdict = "fail"
	ScoreVerdictPartial ScoreVerdict = "partial"
	ScoreVerdictError   ScoreVerdict = "error"
)

type Scorecard ¶

type Scorecard struct {
	ID             string       `json:"id"`
	GroupID        string       `json:"group_id"`
	GroupItemID    string       `json:"group_item_id"`
	RunID          string       `json:"run_id,omitempty"`
	Mode           ScoringMode  `json:"mode"`
	Verdict        ScoreVerdict `json:"verdict"`
	Score          float64      `json:"score"`
	BreakdownJSON  string       `json:"breakdown_json,omitempty"`
	EvidenceJSON   string       `json:"evidence_json,omitempty"`
	JudgeTraceJSON string       `json:"judge_trace_json,omitempty"`
	CreatedAt      time.Time    `json:"created_at"`
}

type ScoringMode ¶

type ScoringMode string

const (
	ScoringModeRule   ScoringMode = "rule"
	ScoringModeJudge  ScoringMode = "judge"
	ScoringModeHybrid ScoringMode = "hybrid"
)

type SelectorCuratedAssets ¶

type SelectorCuratedAssets struct {
	Dataset        *Dataset        `json:"dataset,omitempty"`
	DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
	EvalSpec       *EvalSpec       `json:"eval_spec,omitempty"`
}

type SelectorGateCheck ¶

type SelectorGateCheck struct {
	Name     string                 `json:"name"`
	Passed   bool                   `json:"passed"`
	Actual   interface{}            `json:"actual,omitempty"`
	Expected interface{}            `json:"expected,omitempty"`
	Details  map[string]interface{} `json:"details,omitempty"`
}

type SelectorGateMetrics ¶

type SelectorGateMetrics struct {
	CaseCount                         int                                   `json:"case_count"`
	PassedCount                       int                                   `json:"passed_count"`
	PassRate                          float64                               `json:"pass_rate"`
	CriticalCaseCount                 int                                   `json:"critical_case_count"`
	CriticalPassedCount               int                                   `json:"critical_passed_count"`
	CriticalPassRate                  float64                               `json:"critical_pass_rate"`
	RouteCaseCount                    int                                   `json:"route_case_count"`
	RouteAgreementCount               int                                   `json:"route_agreement_count"`
	RouteAgreementRate                float64                               `json:"route_agreement_rate"`
	RouteCompatibleCount              int                                   `json:"route_compatible_count"`
	RouteCompatibleRate               float64                               `json:"route_compatible_rate"`
	RouteImprovementCount             int                                   `json:"route_improvement_count"`
	RouteDisagreementCount            int                                   `json:"route_disagreement_count"`
	CriticalRegressionCount           int                                   `json:"critical_regression_count"`
	BaseClarifyRate                   float64                               `json:"base_clarify_rate"`
	TargetClarifyRate                 float64                               `json:"target_clarify_rate"`
	ClarifyRateDelta                  float64                               `json:"clarify_rate_delta"`
	LocaleBreakdown                   map[string]SelectorGateSegmentMetrics `json:"locale_breakdown,omitempty"`
	PrimaryRouteBreakdown             map[string]SelectorGateSegmentMetrics `json:"primary_route_breakdown,omitempty"`
	SelectedCanonicalSkillBreakdown   map[string]int                        `json:"selected_canonical_skill_breakdown,omitempty"`
	NativeSurfaceModeBreakdown        map[string]int                        `json:"native_surface_mode_breakdown,omitempty"`
	NativeSurfaceReasonBreakdown      map[string]int                        `json:"native_surface_reason_breakdown,omitempty"`
	ExecutionProfileBreakdown         map[string]int                        `json:"execution_profile_breakdown,omitempty"`
	ToolSurfaceAliasRewriteCount      int                                   `json:"tool_surface_alias_rewrite_count,omitempty"`
	ToolSurfaceCacheInvalidationCount int                                   `json:"tool_surface_cache_invalidation_count,omitempty"`
	ToolSurfaceExecCutoverCount       int                                   `json:"tool_surface_exec_cutover_count,omitempty"`
}

type SelectorGateReport ¶

type SelectorGateReport struct {
	TargetEvalRunID    string                 `json:"target_eval_run_id"`
	BaseEvalRunID      string                 `json:"base_eval_run_id,omitempty"`
	BaselineID         string                 `json:"baseline_id,omitempty"`
	ComparisonReportID string                 `json:"comparison_report_id,omitempty"`
	Metrics            SelectorGateMetrics    `json:"metrics"`
	Thresholds         map[string]interface{} `json:"thresholds,omitempty"`
	Checks             []SelectorGateCheck    `json:"checks,omitempty"`
	Passed             bool                   `json:"passed"`
	CreatedAt          time.Time              `json:"created_at"`
}

type SelectorGateRequest ¶

type SelectorGateRequest struct {
	BaseEvalRunID string                 `json:"base_eval_run_id,omitempty"`
	BaselineID    string                 `json:"baseline_id,omitempty"`
	Thresholds    SelectorGateThresholds `json:"thresholds,omitempty"`
}

type SelectorGateSegmentMetrics ¶

type SelectorGateSegmentMetrics struct {
	CaseCount               int     `json:"case_count"`
	RouteAgreementCount     int     `json:"route_agreement_count"`
	RouteAgreementRate      float64 `json:"route_agreement_rate"`
	RouteCompatibleCount    int     `json:"route_compatible_count"`
	RouteCompatibleRate     float64 `json:"route_compatible_rate"`
	RouteImprovementCount   int     `json:"route_improvement_count"`
	RouteDisagreementCount  int     `json:"route_disagreement_count"`
	CriticalCaseCount       int     `json:"critical_case_count"`
	CriticalRegressionCount int     `json:"critical_regression_count"`
	BaseClarifyCount        int     `json:"base_clarify_count"`
	BaseClarifyRate         float64 `json:"base_clarify_rate"`
	TargetClarifyCount      int     `json:"target_clarify_count"`
	TargetClarifyRate       float64 `json:"target_clarify_rate"`
	ClarifyRateDelta        float64 `json:"clarify_rate_delta"`
}

type SelectorGateThresholds ¶

type SelectorGateThresholds struct {
	MinPassRate                *float64 `json:"min_pass_rate,omitempty"`
	MinCriticalPassRate        *float64 `json:"min_critical_pass_rate,omitempty"`
	MinRouteAgreementRate      *float64 `json:"min_route_agreement_rate,omitempty"`
	MinRouteCompatibleRate     *float64 `json:"min_route_compatible_rate,omitempty"`
	MaxClarifyRateDelta        *float64 `json:"max_clarify_rate_delta,omitempty"`
	MaxCriticalRegressionCount *int     `json:"max_critical_regression_count,omitempty"`
}

func DefaultSelectorGateThresholds ¶

func DefaultSelectorGateThresholds() SelectorGateThresholds

type SkillCutoverBudgetCheck ¶

type SkillCutoverBudgetCheck struct {
	Name     string                 `json:"name"`
	Passed   bool                   `json:"passed"`
	Actual   interface{}            `json:"actual,omitempty"`
	Expected interface{}            `json:"expected,omitempty"`
	Details  map[string]interface{} `json:"details,omitempty"`
}

type SkillCutoverBudgetMetrics ¶

type SkillCutoverBudgetMetrics struct {
	CaseCount                         int            `json:"case_count"`
	ComparableCaseCount               int            `json:"comparable_case_count"`
	MissingSurfaceCaseCount           int            `json:"missing_surface_case_count"`
	BaseMedianToolCount               float64        `json:"base_median_tool_count"`
	TargetMedianToolCount             float64        `json:"target_median_tool_count"`
	BaseMedianSchemaBytes             float64        `json:"base_median_schema_bytes"`
	TargetMedianSchemaBytes           float64        `json:"target_median_schema_bytes"`
	MedianSchemaByteReductionRate     float64        `json:"median_schema_byte_reduction_rate"`
	BaseMedianLatencyMs               float64        `json:"base_median_latency_ms"`
	TargetMedianLatencyMs             float64        `json:"target_median_latency_ms"`
	MedianLatencyIncreaseRate         float64        `json:"median_latency_increase_rate"`
	AllowedFinalNativeTools           []string       `json:"allowed_final_native_tools,omitempty"`
	AllowedFinalNativeToolCases       int            `json:"allowed_final_native_tool_cases"`
	AllowedFinalNativeToolCaseRate    float64        `json:"allowed_final_native_tool_case_rate"`
	NonAllowedNativeToolCaseCount     int            `json:"non_allowed_native_tool_case_count"`
	SelectedCanonicalSkillBreakdown   map[string]int `json:"selected_canonical_skill_breakdown,omitempty"`
	NativeSurfaceModeBreakdown        map[string]int `json:"native_surface_mode_breakdown,omitempty"`
	NativeSurfaceReasonBreakdown      map[string]int `json:"native_surface_reason_breakdown,omitempty"`
	ExecutionProfileBreakdown         map[string]int `json:"execution_profile_breakdown,omitempty"`
	ToolSurfaceAliasRewriteCount      int            `json:"tool_surface_alias_rewrite_count,omitempty"`
	ToolSurfaceCacheInvalidationCount int            `json:"tool_surface_cache_invalidation_count,omitempty"`
	ToolSurfaceExecCutoverCount       int            `json:"tool_surface_exec_cutover_count,omitempty"`
}

type SkillCutoverBudgetReport ¶

type SkillCutoverBudgetReport struct {
	TargetEvalRunID string                    `json:"target_eval_run_id"`
	BaseEvalRunID   string                    `json:"base_eval_run_id,omitempty"`
	BaselineID      string                    `json:"baseline_id,omitempty"`
	Metrics         SkillCutoverBudgetMetrics `json:"metrics"`
	Thresholds      map[string]interface{}    `json:"thresholds,omitempty"`
	Checks          []SkillCutoverBudgetCheck `json:"checks,omitempty"`
	Passed          bool                      `json:"passed"`
	CreatedAt       time.Time                 `json:"created_at"`
}

type SkillCutoverBudgetRequest ¶

type SkillCutoverBudgetRequest struct {
	BaseEvalRunID string                       `json:"base_eval_run_id,omitempty"`
	BaselineID    string                       `json:"baseline_id,omitempty"`
	Thresholds    SkillCutoverBudgetThresholds `json:"thresholds,omitempty"`
}

type SkillCutoverBudgetThresholds ¶

type SkillCutoverBudgetThresholds struct {
	MinMedianSchemaByteReductionRate *float64 `json:"min_median_schema_byte_reduction_rate,omitempty"`
	MaxMedianLatencyIncreaseRate     *float64 `json:"max_median_latency_increase_rate,omitempty"`
	AllowedFinalNativeTools          []string `json:"allowed_final_native_tools,omitempty"`
}

type SkillCutoverLaneAssessment ¶

type SkillCutoverLaneAssessment struct {
	EvalRunID          string    `json:"eval_run_id"`
	CandidateID        string    `json:"candidate_id,omitempty"`
	Title              string    `json:"title,omitempty"`
	CreatedAt          time.Time `json:"created_at"`
	Passed             bool      `json:"passed"`
	BaseEvalRunID      string    `json:"base_eval_run_id,omitempty"`
	BaselineID         string    `json:"baseline_id,omitempty"`
	ComparisonReportID string    `json:"comparison_report_id,omitempty"`
	FailedChecks       []string  `json:"failed_checks,omitempty"`
}

type SkillCutoverLaneReadiness ¶

type SkillCutoverLaneReadiness struct {
	EvalSpecID              string                       `json:"eval_spec_id,omitempty"`
	CandidateID             string                       `json:"candidate_id,omitempty"`
	RequiredConsecutiveRuns int                          `json:"required_consecutive_runs"`
	CandidateRunCount       int                          `json:"candidate_run_count"`
	ConsecutivePassCount    int                          `json:"consecutive_pass_count"`
	Ready                   bool                         `json:"ready"`
	BaseEvalRunID           string                       `json:"base_eval_run_id,omitempty"`
	BaselineID              string                       `json:"baseline_id,omitempty"`
	Error                   string                       `json:"error,omitempty"`
	Assessments             []SkillCutoverLaneAssessment `json:"assessments,omitempty"`
}

type SkillCutoverReadinessReport ¶

type SkillCutoverReadinessReport struct {
	CandidateID             string                    `json:"candidate_id,omitempty"`
	RequiredConsecutiveRuns int                       `json:"required_consecutive_runs"`
	EvaluatedGatesReady     bool                      `json:"evaluated_gates_ready"`
	Ready                   bool                      `json:"ready"`
	Selector                SkillCutoverLaneReadiness `json:"selector"`
	Execution               SkillCutoverLaneReadiness `json:"execution"`
	Budget                  SkillCutoverLaneReadiness `json:"budget"`
	UnverifiedRequirements  []string                  `json:"unverified_requirements,omitempty"`
	BlockingReasons         []string                  `json:"blocking_reasons,omitempty"`
	CreatedAt               time.Time                 `json:"created_at"`
}

type SkillCutoverReadinessRequest ¶

type SkillCutoverReadinessRequest struct {
	OwnerUserID             string                      `json:"owner_user_id,omitempty"`
	CandidateID             string                      `json:"candidate_id,omitempty"`
	SelectorEvalSpecID      string                      `json:"selector_eval_spec_id,omitempty"`
	ExecutionEvalSpecID     string                      `json:"execution_eval_spec_id,omitempty"`
	RequiredConsecutiveRuns int                         `json:"required_consecutive_runs,omitempty"`
	MaxAssessments          int                         `json:"max_assessments,omitempty"`
	Selector                SelectorGateRequest         `json:"selector,omitempty"`
	Execution               ExecutionEquivalenceRequest `json:"execution,omitempty"`
	Budget                  SkillCutoverBudgetRequest   `json:"budget,omitempty"`
}

type SkillDecisionHistoryEntry ¶

type SkillDecisionHistoryEntry struct {
	RevisionID          string                      `json:"revision_id"`
	SkillID             string                      `json:"skill_id"`
	Status              SkillRevisionStatus         `json:"status"`
	SourcePath          string                      `json:"source_path,omitempty"`
	CandidateID         string                      `json:"candidate_id,omitempty"`
	BaseContentSHA256   string                      `json:"base_content_sha256,omitempty"`
	OriginCaseID        string                      `json:"origin_case_id,omitempty"`
	ParentRevisionID    string                      `json:"parent_revision_id,omitempty"`
	BackupOfRevisionID  string                      `json:"backup_of_revision_id,omitempty"`
	EvalRunID           string                      `json:"eval_run_id,omitempty"`
	OptimizationRunID   string                      `json:"optimization_run_id,omitempty"`
	FollowupGate        string                      `json:"followup_gate,omitempty"`
	OptimizationSurface OptimizationSurface         `json:"optimization_surface,omitempty"`
	DecisionAction      SkillRevisionDecisionAction `json:"decision_action,omitempty"`
	ReviewNote          string                      `json:"review_note,omitempty"`
	ReviewedBy          string                      `json:"reviewed_by,omitempty"`
	DecisionLog         map[string]interface{}      `json:"decision_log,omitempty"`
	DecisionLogJSON     string                      `json:"decision_log_json,omitempty"`
	DecisionAt          time.Time                   `json:"decision_at"`
	CreatedAt           time.Time                   `json:"created_at"`
	ReviewedAt          *time.Time                  `json:"reviewed_at,omitempty"`
	PromotedAt          *time.Time                  `json:"promoted_at,omitempty"`
}

type SkillDecisionHistoryFilter ¶

type SkillDecisionHistoryFilter struct {
	SkillID string                        `json:"skill_id,omitempty"`
	Actions []SkillRevisionDecisionAction `json:"actions,omitempty"`
	Limit   int                           `json:"limit,omitempty"`
}

type SkillEvolutionCase ¶

type SkillEvolutionCase struct {
	ID                string                   `json:"id"`
	SkillID           string                   `json:"skill_id"`
	OwnerUserID       string                   `json:"owner_user_id,omitempty"`
	Mode              SkillEvolutionMode       `json:"mode"`
	Reason            SkillEvolutionReason     `json:"reason"`
	SourceKind        string                   `json:"source_kind,omitempty"`
	SourceID          string                   `json:"source_id,omitempty"`
	CandidateID       string                   `json:"candidate_id,omitempty"`
	BaseContentSHA256 string                   `json:"base_content_sha256,omitempty"`
	FailureSignature  string                   `json:"failure_signature,omitempty"`
	DedupKey          string                   `json:"dedup_key,omitempty"`
	Summary           string                   `json:"summary,omitempty"`
	EvidenceJSON      string                   `json:"evidence_json,omitempty"`
	RevisionID        string                   `json:"revision_id,omitempty"`
	Status            SkillEvolutionCaseStatus `json:"status"`
	SkippedReason     string                   `json:"skipped_reason,omitempty"`
	CreatedAt         time.Time                `json:"created_at"`
	UpdatedAt         time.Time                `json:"updated_at"`
}

type SkillEvolutionCaseDetail ¶

type SkillEvolutionCaseDetail struct {
	SkillEvolutionCase
	LinkedRevision  *SkillRevision `json:"linked_revision,omitempty"`
	SourceRunID     string         `json:"source_run_id,omitempty"`
	SourceRun       *Run           `json:"source_run,omitempty"`
	SourceEvalRunID string         `json:"source_eval_run_id,omitempty"`
	SourceEvalRun   *EvalRun       `json:"source_eval_run,omitempty"`
	LinkedEvalRunID string         `json:"linked_eval_run_id,omitempty"`
	LinkedEvalRun   *EvalRun       `json:"linked_eval_run,omitempty"`
}

type SkillEvolutionCaseFilter ¶

type SkillEvolutionCaseFilter struct {
	SkillID     string                     `json:"skill_id,omitempty"`
	OwnerUserID string                     `json:"owner_user_id,omitempty"`
	CandidateID string                     `json:"candidate_id,omitempty"`
	RevisionID  string                     `json:"revision_id,omitempty"`
	Statuses    []SkillEvolutionCaseStatus `json:"statuses,omitempty"`
	Limit       int                        `json:"limit,omitempty"`
}

type SkillEvolutionCaseSpec ¶

type SkillEvolutionCaseSpec struct {
	ID                string               `json:"id,omitempty"`
	SkillID           string               `json:"skill_id"`
	OwnerUserID       string               `json:"owner_user_id,omitempty"`
	Mode              SkillEvolutionMode   `json:"mode"`
	Reason            SkillEvolutionReason `json:"reason"`
	SourceKind        string               `json:"source_kind,omitempty"`
	SourceID          string               `json:"source_id,omitempty"`
	CandidateID       string               `json:"candidate_id,omitempty"`
	BaseContentSHA256 string               `json:"base_content_sha256,omitempty"`
	FailureSignature  string               `json:"failure_signature,omitempty"`
	Summary           string               `json:"summary,omitempty"`
	EvidenceJSON      string               `json:"evidence_json,omitempty"`
}

type SkillEvolutionCaseStatus ¶

type SkillEvolutionCaseStatus string

const (
	SkillEvolutionCaseStatusOpen             SkillEvolutionCaseStatus = "open"
	SkillEvolutionCaseStatusCandidateCreated SkillEvolutionCaseStatus = "candidate_created"
	SkillEvolutionCaseStatusAccepted         SkillEvolutionCaseStatus = "accepted"
	SkillEvolutionCaseStatusRejected         SkillEvolutionCaseStatus = "rejected"
	SkillEvolutionCaseStatusPromoted         SkillEvolutionCaseStatus = "promoted"
	SkillEvolutionCaseStatusSkipped          SkillEvolutionCaseStatus = "skipped"
)

type SkillEvolutionMode ¶

type SkillEvolutionMode string

const (
	SkillEvolutionModeFix     SkillEvolutionMode = "fix"
	SkillEvolutionModeCapture SkillEvolutionMode = "capture"
)

type SkillEvolutionReason ¶

type SkillEvolutionReason string

const (
	SkillEvolutionReasonRuntimeFailure      SkillEvolutionReason = "runtime_failure"
	SkillEvolutionReasonRuntimeCapture      SkillEvolutionReason = "runtime_capture"
	SkillEvolutionReasonSelectorGateFailed  SkillEvolutionReason = "selector_gate_failed"
	SkillEvolutionReasonExecutionGateFailed SkillEvolutionReason = "execution_gate_failed"
	SkillEvolutionReasonBudgetGateFailed    SkillEvolutionReason = "budget_gate_failed"
	SkillEvolutionReasonManual              SkillEvolutionReason = "manual"
)

type SkillOptimizeRequest ¶

type SkillOptimizeRequest struct {
	EvalRunID   string `json:"eval_run_id"`
	CandidateID string `json:"candidate_id,omitempty"`
	SourcePath  string `json:"source_path,omitempty"`
}

type SkillPromoteResult ¶

type SkillPromoteResult struct {
	PromotedRevisionID string `json:"promoted_revision_id"`
	BackupRevisionID   string `json:"backup_revision_id"`
	WrittenSourcePath  string `json:"written_source_path"`
}

type SkillRevision ¶

type SkillRevision struct {
	ID                  string                      `json:"id"`
	SkillID             string                      `json:"skill_id"`
	Status              SkillRevisionStatus         `json:"status"`
	SourcePath          string                      `json:"source_path,omitempty"`
	CandidateID         string                      `json:"candidate_id,omitempty"`
	BaseContentSHA256   string                      `json:"base_content_sha256,omitempty"`
	OriginCaseID        string                      `json:"origin_case_id,omitempty"`
	ParentRevisionID    string                      `json:"parent_revision_id,omitempty"`
	BackupOfRevisionID  string                      `json:"backup_of_revision_id,omitempty"`
	EvalRunID           string                      `json:"eval_run_id,omitempty"`
	OptimizationRunID   string                      `json:"optimization_run_id,omitempty"`
	FollowupGate        string                      `json:"followup_gate,omitempty"`
	OptimizationSurface OptimizationSurface         `json:"optimization_surface,omitempty"`
	DecisionAction      SkillRevisionDecisionAction `json:"decision_action,omitempty"`
	ReviewNote          string                      `json:"review_note,omitempty"`
	ReviewedBy          string                      `json:"reviewed_by,omitempty"`
	DecisionLogJSON     string                      `json:"decision_log_json,omitempty"`
	Content             string                      `json:"content,omitempty"`
	ContentSHA256       string                      `json:"content_sha256,omitempty"`
	CreatedAt           time.Time                   `json:"created_at"`
	ReviewedAt          *time.Time                  `json:"reviewed_at,omitempty"`
	PromotedAt          *time.Time                  `json:"promoted_at,omitempty"`
}

type SkillRevisionDecisionAction ¶

type SkillRevisionDecisionAction string

const (
	SkillRevisionDecisionActionPromote  SkillRevisionDecisionAction = "promote"
	SkillRevisionDecisionActionRollback SkillRevisionDecisionAction = "rollback"
)

type SkillRevisionDecisionRequest ¶

type SkillRevisionDecisionRequest struct {
	ReviewNote string `json:"review_note,omitempty"`
	ReviewedBy string `json:"-"`
}

type SkillRevisionFilter ¶

type SkillRevisionFilter struct {
	SkillID  string                `json:"skill_id,omitempty"`
	Statuses []SkillRevisionStatus `json:"statuses,omitempty"`
	Limit    int                   `json:"limit,omitempty"`
}

type SkillRevisionPromotionRecorder ¶

type SkillRevisionPromotionRecorder interface {
	RecordSkillRevisionPromotion(ctx context.Context, promotedRevision *SkillRevision, backupRevision *SkillRevision, writtenSourcePath string) error
}

type SkillRevisionStatus ¶

type SkillRevisionStatus string

const (
	SkillRevisionStatusCandidate SkillRevisionStatus = "candidate"
	SkillRevisionStatusAccepted  SkillRevisionStatus = "accepted"
	SkillRevisionStatusRejected  SkillRevisionStatus = "rejected"
	SkillRevisionStatusPromoted  SkillRevisionStatus = "promoted"
	SkillRevisionStatusBackup    SkillRevisionStatus = "backup"
)

type SnapshotDriver ¶

type SnapshotDriver interface {
	Sync(ctx context.Context, run *Run) (*Run, error)
}

type Store ¶

type Store interface {
	CreateRun(ctx context.Context, run *Run) error
	UpdateRun(ctx context.Context, run *Run) error
	GetRun(ctx context.Context, id string) (*Run, error)
	ListRuns(ctx context.Context, filter RunFilter) ([]Run, error)
	AppendEvent(ctx context.Context, event RunEvent) error
	ListEvents(ctx context.Context, runID string, limit int) ([]RunEvent, error)
	AttachArtifact(ctx context.Context, ref ArtifactRef) error
	ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)
}

type UserTaskActions ¶

type UserTaskActions struct {
	Items []ActionDescriptor `json:"items,omitempty"`
}

type UserTaskArtifact ¶

type UserTaskArtifact struct {
	Kind  string `json:"kind"`
	Label string `json:"label"`
	URL   string `json:"url,omitempty"`
}

type UserTaskBlocker ¶

type UserTaskBlocker struct {
	Kind         string `json:"kind"`
	Label        string `json:"label"`
	PendingCount int    `json:"pending_count"`
	ModalOnly    bool   `json:"modal_only"`
}

type UserTaskProjection ¶

type UserTaskProjection struct {
	ID                 string                   `json:"id"`
	Kind               RunKind                  `json:"kind"`
	ConversationID     string                   `json:"conversation_id,omitempty"`
	Scope              string                   `json:"scope"`
	Title              string                   `json:"title"`
	Subtitle           string                   `json:"subtitle,omitempty"`
	Status             string                   `json:"status"`
	Stage              string                   `json:"stage"`
	Progress           int                      `json:"progress"`
	Blocker            *UserTaskBlocker         `json:"blocker,omitempty"`
	ResultPreview      string                   `json:"result_preview,omitempty"`
	ErrorPreview       string                   `json:"error_preview,omitempty"`
	Artifacts          []UserTaskArtifact       `json:"artifacts,omitempty"`
	ResearchSources    []UserTaskResearchSource `json:"research_sources,omitempty"`
	SubagentSummary    *UserTaskSubagentSummary `json:"subagent_summary,omitempty"`
	Actions            UserTaskActions          `json:"actions"`
	RunStatus          string                   `json:"run_status,omitempty"`
	VerificationStatus string                   `json:"verification_status,omitempty"`
	Score              *float64                 `json:"score,omitempty"`
	EvidenceCount      *int                     `json:"evidence_count,omitempty"`
	DetailHref         string                   `json:"detail_href,omitempty"`
	UpdatedAt          time.Time                `json:"updated_at"`
	FinishedAt         *time.Time               `json:"finished_at,omitempty"`
}

type UserTaskProjectionFilter ¶

type UserTaskProjectionFilter struct {
	UserID         string
	ConversationID string
	Scope          string
	Limit          int
}

type UserTaskProjectionHandler ¶

type UserTaskProjectionHandler struct {
	// contains filtered or unexported fields
}

func NewUserTaskProjectionHandler ¶

func NewUserTaskProjectionHandler(manager *Controller, detailProvider RunDetailProvider) *UserTaskProjectionHandler

func (*UserTaskProjectionHandler) CancelTask ¶

func (h *UserTaskProjectionHandler) CancelTask(c echo.Context) error

func (*UserTaskProjectionHandler) GetTask ¶

func (h *UserTaskProjectionHandler) GetTask(c echo.Context) error

func (*UserTaskProjectionHandler) PerformTaskAction ¶

func (h *UserTaskProjectionHandler) PerformTaskAction(c echo.Context) error

func (*UserTaskProjectionHandler) RegisterRoutes ¶

func (h *UserTaskProjectionHandler) RegisterRoutes(g *echo.Group)

func (*UserTaskProjectionHandler) ResumeTask ¶

func (h *UserTaskProjectionHandler) ResumeTask(c echo.Context) error

type UserTaskProjectionService ¶

type UserTaskProjectionService struct {
	// contains filtered or unexported fields
}

func NewUserTaskProjectionService ¶

func NewUserTaskProjectionService(manager *Controller, detailProvider RunDetailProvider) *UserTaskProjectionService

func (*UserTaskProjectionService) Get ¶

func (s *UserTaskProjectionService) Get(ctx context.Context, runID string, scope string) (*UserTaskProjection, error)

func (*UserTaskProjectionService) List ¶

func (s *UserTaskProjectionService) List(ctx context.Context, filter UserTaskProjectionFilter) ([]UserTaskProjection, error)

type UserTaskResearchSource ¶

type UserTaskResearchSource struct {
	Title            string  `json:"title"`
	URL              string  `json:"url,omitempty"`
	Domain           string  `json:"domain,omitempty"`
	SourceType       string  `json:"source_type,omitempty"`
	PublishedAt      string  `json:"published_at,omitempty"`
	FetchedAt        string  `json:"fetched_at,omitempty"`
	RelevanceScore   float64 `json:"relevance_score,omitempty"`
	CredibilityScore float64 `json:"credibility_score,omitempty"`
}

type UserTaskSubagentSummary ¶

type UserTaskSubagentSummary struct {
	Total           int       `json:"total"`
	Running         int       `json:"running,omitempty"`
	WaitingUser     int       `json:"waiting_user,omitempty"`
	Completed       int       `json:"completed,omitempty"`
	Failed          int       `json:"failed,omitempty"`
	Cancelled       int       `json:"cancelled,omitempty"`
	LatestTitle     string    `json:"latest_title,omitempty"`
	LatestStatus    string    `json:"latest_status,omitempty"`
	LatestUpdatedAt time.Time `json:"latest_updated_at,omitempty"`
}

type WritePathGuard ¶

type WritePathGuard struct {
	// contains filtered or unexported fields
}

func NewWritePathGuard ¶

func NewWritePathGuard(manager *Controller) *WritePathGuard

func (*WritePathGuard) CheckWritePath ¶

func (g *WritePathGuard) CheckWritePath(ctx context.Context, absPath string) error

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
drivers

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL