Documentation
¶
Index ¶
- Constants
- func ApplyHarnessContractToExpected(expected map[string]interface{}, contract HarnessContract) map[string]interface{}
- func Batch1ExecutionCaseCount() int
- func BuildHarnessCheckpointContext(raw map[string]interface{}) string
- func BuildHarnessContractContext(contract HarnessContract) string
- func DefaultDatasetBundleGitHubPathForRepo(owner, repo string) string
- func DesktopChatSuccessRateCaseCount() int
- func HarnessAdaptivePolicyMetadata(policy HarnessAdaptivePolicy) map[string]interface{}
- func HarnessContractFallbackPlan(contract HarnessContract) []string
- func HarnessContractMetadata(contract HarnessContract) map[string]interface{}
- func HarnessContractSuccessCriteria(contract HarnessContract) []string
- func LegacyStoreDBPath(dataDir string) string
- func NewRuntimeEventObserverMux(observers ...tools.RuntimeEventObserver) tools.RuntimeEventObserver
- func SelectorCuratedCaseCount() int
- func WithRunContext(ctx context.Context, runCtx *RunContext) context.Context
- type ActionDescriptor
- type ActionDriver
- type ActionInputDescriptor
- type ActionInputFieldDescriptor
- type AgentCompatHandler
- func (h *AgentCompatHandler) CancelTask(c echo.Context) error
- func (h *AgentCompatHandler) CreateTask(c echo.Context) error
- func (h *AgentCompatHandler) DeleteTask(c echo.Context) error
- func (h *AgentCompatHandler) RegisterRoutes(g *echo.Group)
- func (h *AgentCompatHandler) SendMessage(c echo.Context) error
- func (h *AgentCompatHandler) SubmitAnswer(c echo.Context) error
- type ApprovalMode
- type ArtifactRef
- type Baseline
- type BaselineFilter
- type BaselineSpec
- type Batch1ExecutionAssets
- type BatchTrajectoryCase
- type BatchTrajectoryCaseResult
- type BatchTrajectoryExecutor
- type BatchTrajectoryRunResult
- type BatchTrajectoryRunner
- type BatchTrajectoryRunnerConfig
- type CUABenchmarkCaseResult
- type CUABenchmarkReport
- type CUAOSWorldMacBenchmarkAssets
- type CanonicalSkillSourceState
- type CheckpointArtifact
- type CompareEvalRunRequest
- type ComparisonCaseDelta
- type ComparisonReport
- type Controller
- func (c *Controller) AppendEvent(ctx context.Context, event RunEvent) error
- func (c *Controller) AttachArtifact(ctx context.Context, ref ArtifactRef) error
- func (c *Controller) BuildEvolutionOverview(ctx context.Context, skillID string, ownerUserID string) (*EvolutionOverview, error)
- func (c *Controller) BuildSkillEvolutionCaseDetail(ctx context.Context, evolutionCase *SkillEvolutionCase) (*SkillEvolutionCaseDetail, error)
- func (c *Controller) Cancel(ctx context.Context, id string, reason string) error
- func (c *Controller) CancelEvalRun(ctx context.Context, id string, reason string) error
- func (c *Controller) CancelGroup(ctx context.Context, id string, reason string) error
- func (c *Controller) CompareEvalRun(ctx context.Context, targetEvalRunID string, req CompareEvalRunRequest) (*ComparisonReport, error)
- func (c *Controller) CreateBaseline(ctx context.Context, spec BaselineSpec) (*Baseline, error)
- func (c *Controller) CreateDataset(ctx context.Context, spec DatasetSpec) (*Dataset, error)
- func (c *Controller) CreateDatasetVersion(ctx context.Context, datasetID string, spec DatasetVersionSpec) (*DatasetVersion, error)
- func (c *Controller) CreateEvalSpec(ctx context.Context, spec EvalSpecSpec) (*EvalSpec, error)
- func (c *Controller) CreateSkillEvolutionCase(ctx context.Context, evolutionCase SkillEvolutionCase) (*SkillEvolutionCase, error)
- func (c *Controller) CreateSkillRevision(ctx context.Context, revision SkillRevision) (*SkillRevision, error)
- func (c *Controller) Delete(ctx context.Context, id string) error
- func (c *Controller) EnsureBatch1ExecutionAssets(ctx context.Context, ownerUserID string) (*Batch1ExecutionAssets, error)
- func (c *Controller) EnsureDesktopChatSuccessRateAssets(ctx context.Context, ownerUserID string) (*DesktopChatSuccessRateAssets, error)
- func (c *Controller) EnsureSelectorCuratedAssets(ctx context.Context, ownerUserID string) (*SelectorCuratedAssets, error)
- func (c *Controller) EnsureSkillEvolutionCase(ctx context.Context, spec SkillEvolutionCaseSpec) (*SkillEvolutionCase, bool, error)
- func (c *Controller) EvaluateExecutionEquivalence(ctx context.Context, targetEvalRunID string, req ExecutionEquivalenceRequest) (*ExecutionEquivalenceReport, error)
- func (c *Controller) EvaluateSelectorGate(ctx context.Context, targetEvalRunID string, req SelectorGateRequest) (*SelectorGateReport, error)
- func (c *Controller) EvaluateSkillCutoverBudgetGate(ctx context.Context, targetEvalRunID string, req SkillCutoverBudgetRequest) (*SkillCutoverBudgetReport, error)
- func (c *Controller) EvaluateSkillCutoverReadiness(ctx context.Context, req SkillCutoverReadinessRequest) (*SkillCutoverReadinessReport, error)
- func (c *Controller) ExecutionMiddlewares() []ExecutionMiddleware
- func (c *Controller) FindRunByMetadata(ctx context.Context, kind RunKind, key string, value string) (*Run, error)
- func (c *Controller) Get(ctx context.Context, id string) (*Run, error)
- func (c *Controller) GetComparisonReport(ctx context.Context, id string) (*ComparisonReport, error)
- func (c *Controller) GetDataset(ctx context.Context, id string) (*Dataset, error)
- func (c *Controller) GetDatasetVersion(ctx context.Context, id string) (*DatasetVersion, error)
- func (c *Controller) GetEvalRun(ctx context.Context, id string) (*EvalRun, error)
- func (c *Controller) GetEvalRunReport(ctx context.Context, id string) (*EvalRunReport, error)
- func (c *Controller) GetEvalSpec(ctx context.Context, id string) (*EvalSpec, error)
- func (c *Controller) GetGroup(ctx context.Context, id string) (*RunGroup, error)
- func (c *Controller) GetGroupReport(ctx context.Context, id string) (*RunGroupReport, error)
- func (c *Controller) GetRegisteredDriver(kind RunKind) Driver
- func (c *Controller) GetSkillEvolutionCase(ctx context.Context, id string) (*SkillEvolutionCase, error)
- func (c *Controller) GetSkillEvolutionCaseDetail(ctx context.Context, id string) (*SkillEvolutionCaseDetail, error)
- func (c *Controller) GetSkillRevision(ctx context.Context, id string) (*SkillRevision, error)
- func (c *Controller) GetStored(ctx context.Context, id string) (*Run, error)
- func (c *Controller) ImportDatasetBundle(ctx context.Context, req ImportDatasetBundleRequest) (*ImportDatasetBundleResult, error)
- func (c *Controller) List(ctx context.Context, filter RunFilter) ([]Run, error)
- func (c *Controller) ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)
- func (c *Controller) ListBaselines(ctx context.Context, filter BaselineFilter) ([]Baseline, error)
- func (c *Controller) ListDatasetVersions(ctx context.Context, datasetID string, limit int) ([]DatasetVersion, error)
- func (c *Controller) ListDatasets(ctx context.Context, filter DatasetFilter) ([]Dataset, error)
- func (c *Controller) ListEvalRuns(ctx context.Context, filter EvalRunFilter) ([]EvalRun, error)
- func (c *Controller) ListEvalSpecs(ctx context.Context, filter EvalSpecFilter) ([]EvalSpec, error)
- func (c *Controller) ListEvents(ctx context.Context, runID string, limit int) ([]RunEvent, error)
- func (c *Controller) ListGroupItems(ctx context.Context, groupID string) ([]RunGroupItem, error)
- func (c *Controller) ListGroups(ctx context.Context, filter RunGroupFilter) ([]RunGroup, error)
- func (c *Controller) ListOne(ctx context.Context, id string) (*Run, error)
- func (c *Controller) ListSkillDecisionHistory(ctx context.Context, filter SkillDecisionHistoryFilter) ([]SkillDecisionHistoryEntry, error)
- func (c *Controller) ListSkillEvolutionCases(ctx context.Context, filter SkillEvolutionCaseFilter) ([]SkillEvolutionCase, error)
- func (c *Controller) ListSkillRevisions(ctx context.Context, filter SkillRevisionFilter) ([]SkillRevision, error)
- func (c *Controller) OptimizeSkill(ctx context.Context, skillID string, req SkillOptimizeRequest) (*OptimizationTrigger, error)
- func (c *Controller) PerformAction(ctx context.Context, id string, action string, input map[string]interface{}) (*Run, error)
- func (c *Controller) PerformGroupAction(ctx context.Context, id string, action string, input map[string]interface{}) (*RunGroup, error)
- func (c *Controller) PromoteGroup(ctx context.Context, groupID string, spec GroupPromotionSpec) (*GroupPromotionResult, error)
- func (c *Controller) PromoteSkillRevision(ctx context.Context, revisionID string, decision SkillRevisionDecisionRequest) (*SkillPromoteResult, error)
- func (c *Controller) RegisterDriver(driver Driver)
- func (c *Controller) RetryFailedGroup(ctx context.Context, id string) (int, error)
- func (c *Controller) RollbackSkillRevision(ctx context.Context, revisionID string, decision SkillRevisionDecisionRequest) (*SkillPromoteResult, error)
- func (c *Controller) RunTraceSnapshot(ctx context.Context, runID string) (*RunTrace, error)
- func (c *Controller) SetJudgeEvaluator(evaluator JudgeEvaluator)
- func (c *Controller) SetOptimizationTriggerer(triggerer OptimizationTriggerer)
- func (c *Controller) SetReflector(reflector ProposalReflector)
- func (c *Controller) SetRunTraceProvider(provider RunTraceProvider)
- func (c *Controller) SetRuntimeReflectionCoordinator(coordinator *RuntimeReflectionCoordinator)
- func (c *Controller) SpawnChild(ctx context.Context, parentID string, spec RunSpec) (*Run, error)
- func (c *Controller) Submit(ctx context.Context, spec RunSpec) (*Run, error)
- func (c *Controller) SubmitEvalRun(ctx context.Context, spec EvalRunSpec) (*EvalRun, error)
- func (c *Controller) SubmitGroup(ctx context.Context, spec RunGroupSpec) (*RunGroup, error)
- func (c *Controller) SyncSnapshot(ctx context.Context, snapshot *Run) error
- func (c *Controller) UpdateSkillEvolutionCase(ctx context.Context, evolutionCase SkillEvolutionCase) (*SkillEvolutionCase, error)
- func (c *Controller) UpdateSkillRevision(ctx context.Context, revision SkillRevision) (*SkillRevision, error)
- func (c *Controller) UseExecutionMiddleware(mw ExecutionMiddleware)
- type Dataset
- type DatasetBundleSourcePreview
- type DatasetBundleVersionPreview
- type DatasetFilter
- type DatasetManifest
- type DatasetManifestDefaults
- type DatasetManifestItem
- type DatasetManifestMeta
- type DatasetSpec
- type DatasetVersion
- type DatasetVersionSpec
- func Batch1ExecutionDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
- func CUAOSWorldMacBenchmarkDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
- func DesktopChatSuccessRateDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
- func SelectorCuratedDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
- type Defaults
- type DesktopChatSuccessRateAssets
- type DesktopChatSuccessRateGateCheck
- type DesktopChatSuccessRateGateReport
- type Driver
- type EvalRun
- type EvalRunFilter
- type EvalRunReport
- type EvalRunSpec
- type EvalSpec
- type EvalSpecFilter
- type EvalSpecSpec
- func Batch1ExecutionEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
- func CUAOSWorldMacBenchmarkEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
- func DesktopChatSuccessRateEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
- func SelectorCuratedEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
- type EvolutionOverview
- type EvolutionOverviewInstructionCounts
- type EvolutionOverviewRevisionCounts
- type EvolutionProposalSummaryProvider
- type ExecPathGuard
- type ExecutionEquivalenceCheck
- type ExecutionEquivalenceMetrics
- type ExecutionEquivalenceReport
- type ExecutionEquivalenceRequest
- type ExecutionEquivalenceSegmentMetrics
- type ExecutionEquivalenceThresholds
- type ExecutionMiddleware
- type ExecutionMiddlewareHooks
- type GroupDispatcher
- type GroupPromotionResult
- type GroupPromotionSpec
- type GroupSchedulerConfig
- type GroupScoringConfig
- type GuardPipelineError
- type Handler
- func (h *Handler) CancelEvalRun(c echo.Context) error
- func (h *Handler) CancelGroup(c echo.Context) error
- func (h *Handler) CancelRun(c echo.Context) error
- func (h *Handler) CompareEvalRun(c echo.Context) error
- func (h *Handler) CreateBaseline(c echo.Context) error
- func (h *Handler) CreateDataset(c echo.Context) error
- func (h *Handler) CreateDatasetVersion(c echo.Context) error
- func (h *Handler) CreateEvalRun(c echo.Context) error
- func (h *Handler) CreateEvalSpec(c echo.Context) error
- func (h *Handler) CreateGroup(c echo.Context) error
- func (h *Handler) CreateRun(c echo.Context) error
- func (h *Handler) EnsureBatch1ExecutionAssets(c echo.Context) error
- func (h *Handler) EnsureSelectorCuratedAssets(c echo.Context) error
- func (h *Handler) EvaluateExecutionEquivalence(c echo.Context) error
- func (h *Handler) EvaluateSelectorGate(c echo.Context) error
- func (h *Handler) EvaluateSkillCutoverBudgetGate(c echo.Context) error
- func (h *Handler) EvaluateSkillCutoverReadiness(c echo.Context) error
- func (h *Handler) GetComparisonReport(c echo.Context) error
- func (h *Handler) GetDataset(c echo.Context) error
- func (h *Handler) GetDatasetVersion(c echo.Context) error
- func (h *Handler) GetEvalRun(c echo.Context) error
- func (h *Handler) GetEvalRunReport(c echo.Context) error
- func (h *Handler) GetEvalSpec(c echo.Context) error
- func (h *Handler) GetEvolutionOverview(c echo.Context) error
- func (h *Handler) GetGroup(c echo.Context) error
- func (h *Handler) GetGroupReport(c echo.Context) error
- func (h *Handler) GetRun(c echo.Context) error
- func (h *Handler) GetRunDetail(c echo.Context) error
- func (h *Handler) GetSkillEvolutionCase(c echo.Context) error
- func (h *Handler) GetSkillRevision(c echo.Context) error
- func (h *Handler) ImportDatasetBundle(c echo.Context) error
- func (h *Handler) ImportDatasetBundleFromSource(c echo.Context) error
- func (h *Handler) ListArtifacts(c echo.Context) error
- func (h *Handler) ListBaselines(c echo.Context) error
- func (h *Handler) ListDatasetVersions(c echo.Context) error
- func (h *Handler) ListDatasets(c echo.Context) error
- func (h *Handler) ListEvalRuns(c echo.Context) error
- func (h *Handler) ListEvalSpecs(c echo.Context) error
- func (h *Handler) ListEvents(c echo.Context) error
- func (h *Handler) ListGroupItems(c echo.Context) error
- func (h *Handler) ListGroups(c echo.Context) error
- func (h *Handler) ListRuns(c echo.Context) error
- func (h *Handler) ListSkillDecisionHistory(c echo.Context) error
- func (h *Handler) ListSkillEvolutionCases(c echo.Context) error
- func (h *Handler) ListSkillRevisions(c echo.Context) error
- func (h *Handler) OptimizeSkill(c echo.Context) error
- func (h *Handler) PerformGroupAction(c echo.Context) error
- func (h *Handler) PerformRunAction(c echo.Context) error
- func (h *Handler) PreviewDatasetBundleFromSource(c echo.Context) error
- func (h *Handler) PromoteGroup(c echo.Context) error
- func (h *Handler) PromoteSkillRevision(c echo.Context) error
- func (h *Handler) RegisterRoutes(g *echo.Group)
- func (h *Handler) RetryFailedGroup(c echo.Context) error
- func (h *Handler) RollbackSkillRevision(c echo.Context) error
- func (h *Handler) SetDetailProvider(provider RunDetailProvider)
- func (h *Handler) SetEvolutionProposalSummaryProvider(provider EvolutionProposalSummaryProvider)
- type HarnessAPICheck
- type HarnessAdaptivePolicy
- type HarnessBrowserCheck
- type HarnessCheckpoint
- type HarnessContract
- type HarnessExpectedArtifact
- type HarnessSubagentExecutor
- type HarnessVerificationResult
- type ImportDatasetBundleEvalSpec
- type ImportDatasetBundleFromSourceRequest
- type ImportDatasetBundleRequest
- type ImportDatasetBundleResult
- type JudgeEvaluationRequest
- type JudgeEvaluationResult
- type JudgeEvaluator
- type LLMJudgeEvaluator
- type LegacyStoreMigrationResult
- type Manager
- type OptimizationReason
- type OptimizationSurface
- type OptimizationTrigger
- type OptimizationTriggerer
- type PolicyResolver
- type ProposalReflector
- type Run
- type RunActionAvailability
- type RunContext
- type RunDetail
- type RunDetailProvider
- type RunEnv
- type RunEvent
- type RunFilter
- type RunGroup
- type RunGroupFilter
- type RunGroupItem
- type RunGroupItemSpec
- type RunGroupItemStatus
- type RunGroupKind
- type RunGroupReport
- type RunGroupSpec
- type RunGroupStatus
- type RunKind
- type RunSpec
- type RunStatus
- type RunTrace
- type RunTraceCollector
- type RunTraceEvent
- type RunTraceProvider
- type RunTraceStage
- type RuntimeEvidenceEntry
- type RuntimeEvidenceProvider
- type RuntimeObserver
- func (o *RuntimeObserver) OnApprovalRequested(event tools.ApprovalRuntimeEvent)
- func (o *RuntimeObserver) OnApprovalResolved(event tools.ApprovalRuntimeEvent)
- func (o *RuntimeObserver) OnQuestionRequested(event tools.QuestionRuntimeEvent)
- func (o *RuntimeObserver) OnQuestionResolved(event tools.QuestionRuntimeEvent)
- func (o *RuntimeObserver) OnToolFinished(event tools.ToolRuntimeEvent)
- func (o *RuntimeObserver) OnToolRequested(event tools.ToolRuntimeEvent)
- type RuntimeReflectionCoordinator
- func (c *RuntimeReflectionCoordinator) OnApprovalRequested(event tools.ApprovalRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnApprovalResolved(event tools.ApprovalRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnQuestionRequested(event tools.QuestionRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnQuestionResolved(event tools.QuestionRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnRunTerminal(ctx context.Context, run *Run)
- func (c *RuntimeReflectionCoordinator) OnToolFinished(event tools.ToolRuntimeEvent)
- func (c *RuntimeReflectionCoordinator) OnToolRequested(event tools.ToolRuntimeEvent)
- type RuntimeStage
- type SQLiteStore
- func (s *SQLiteStore) AppendEvent(ctx context.Context, event RunEvent) error
- func (s *SQLiteStore) AttachArtifact(ctx context.Context, ref ArtifactRef) error
- func (s *SQLiteStore) AttachScorecard(ctx context.Context, scorecard Scorecard) error
- func (s *SQLiteStore) BuildEvolutionOverview(ctx context.Context, skillID string, _ string) (*EvolutionOverview, error)
- func (s *SQLiteStore) ClaimNextGroupItem(ctx context.Context, groupID, workerID string, leaseTTL time.Duration, ...) (*RunGroupItem, error)
- func (s *SQLiteStore) ClearDefaultBaseline(ctx context.Context, evalSpecID string) error
- func (s *SQLiteStore) CountGroupItemsByStatuses(ctx context.Context, groupID string, statuses []RunGroupItemStatus) (int, error)
- func (s *SQLiteStore) CreateBaseline(ctx context.Context, baseline *Baseline) error
- func (s *SQLiteStore) CreateComparisonReport(ctx context.Context, report *ComparisonReport) error
- func (s *SQLiteStore) CreateDataset(ctx context.Context, dataset *Dataset) error
- func (s *SQLiteStore) CreateDatasetVersion(ctx context.Context, version *DatasetVersion) error
- func (s *SQLiteStore) CreateEvalRun(ctx context.Context, evalRun *EvalRun) error
- func (s *SQLiteStore) CreateEvalSpec(ctx context.Context, spec *EvalSpec) error
- func (s *SQLiteStore) CreateGroup(ctx context.Context, group *RunGroup) error
- func (s *SQLiteStore) CreateGroupItems(ctx context.Context, items []RunGroupItem) error
- func (s *SQLiteStore) CreateRun(ctx context.Context, run *Run) error
- func (s *SQLiteStore) CreateSkillEvolutionCase(ctx context.Context, evolutionCase *SkillEvolutionCase) error
- func (s *SQLiteStore) CreateSkillRevision(ctx context.Context, revision *SkillRevision) error
- func (s *SQLiteStore) DeleteRun(ctx context.Context, id string) error
- func (s *SQLiteStore) FindDatasetByOwnerAndName(ctx context.Context, ownerUserID, name string) (*Dataset, error)
- func (s *SQLiteStore) FindDatasetVersionByDatasetAndVersion(ctx context.Context, datasetID, version string) (*DatasetVersion, error)
- func (s *SQLiteStore) FindEvalSpecByOwnerDatasetAndName(ctx context.Context, ownerUserID, datasetID, name string) (*EvalSpec, error)
- func (s *SQLiteStore) FindLatestSkillEvolutionCaseByDedupKey(ctx context.Context, skillID string, ownerUserID string, dedupKey string) (*SkillEvolutionCase, error)
- func (s *SQLiteStore) FindRunByMetadata(ctx context.Context, kind RunKind, key, value string) (*Run, error)
- func (s *SQLiteStore) GetBaseline(ctx context.Context, id string) (*Baseline, error)
- func (s *SQLiteStore) GetComparisonReport(ctx context.Context, id string) (*ComparisonReport, error)
- func (s *SQLiteStore) GetDataset(ctx context.Context, id string) (*Dataset, error)
- func (s *SQLiteStore) GetDatasetVersion(ctx context.Context, id string) (*DatasetVersion, error)
- func (s *SQLiteStore) GetEvalRun(ctx context.Context, id string) (*EvalRun, error)
- func (s *SQLiteStore) GetEvalSpec(ctx context.Context, id string) (*EvalSpec, error)
- func (s *SQLiteStore) GetGroup(ctx context.Context, id string) (*RunGroup, error)
- func (s *SQLiteStore) GetGroupItem(ctx context.Context, id string) (*RunGroupItem, error)
- func (s *SQLiteStore) GetRun(ctx context.Context, id string) (*Run, error)
- func (s *SQLiteStore) GetSkillEvolutionCase(ctx context.Context, id string) (*SkillEvolutionCase, error)
- func (s *SQLiteStore) GetSkillRevision(ctx context.Context, id string) (*SkillRevision, error)
- func (s *SQLiteStore) LatestScorecardForItem(ctx context.Context, groupItemID string) (*Scorecard, error)
- func (s *SQLiteStore) ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)
- func (s *SQLiteStore) ListBaselines(ctx context.Context, filter BaselineFilter) ([]Baseline, error)
- func (s *SQLiteStore) ListDatasetVersions(ctx context.Context, datasetID string, limit int) ([]DatasetVersion, error)
- func (s *SQLiteStore) ListDatasets(ctx context.Context, filter DatasetFilter) ([]Dataset, error)
- func (s *SQLiteStore) ListEvalRuns(ctx context.Context, filter EvalRunFilter) ([]EvalRun, error)
- func (s *SQLiteStore) ListEvalSpecs(ctx context.Context, filter EvalSpecFilter) ([]EvalSpec, error)
- func (s *SQLiteStore) ListEvents(ctx context.Context, runID string, limit int) ([]RunEvent, error)
- func (s *SQLiteStore) ListGroupItems(ctx context.Context, groupID string) ([]RunGroupItem, error)
- func (s *SQLiteStore) ListGroups(ctx context.Context, filter RunGroupFilter) ([]RunGroup, error)
- func (s *SQLiteStore) ListRuns(ctx context.Context, filter RunFilter) ([]Run, error)
- func (s *SQLiteStore) ListScorecards(ctx context.Context, groupID string) ([]Scorecard, error)
- func (s *SQLiteStore) ListSkillDecisionHistory(ctx context.Context, filter SkillDecisionHistoryFilter) ([]SkillDecisionHistoryEntry, error)
- func (s *SQLiteStore) ListSkillEvolutionCases(ctx context.Context, filter SkillEvolutionCaseFilter) ([]SkillEvolutionCase, error)
- func (s *SQLiteStore) ListSkillRevisions(ctx context.Context, filter SkillRevisionFilter) ([]SkillRevision, error)
- func (s *SQLiteStore) UpdateDataset(ctx context.Context, dataset *Dataset) error
- func (s *SQLiteStore) UpdateEvalRun(ctx context.Context, evalRun *EvalRun) error
- func (s *SQLiteStore) UpdateEvalSpec(ctx context.Context, spec *EvalSpec) error
- func (s *SQLiteStore) UpdateGroup(ctx context.Context, group *RunGroup) error
- func (s *SQLiteStore) UpdateGroupItem(ctx context.Context, item *RunGroupItem) error
- func (s *SQLiteStore) UpdateRun(ctx context.Context, run *Run) error
- func (s *SQLiteStore) UpdateRunIfMaterialStateMatches(ctx context.Context, expected *Run, next *Run) (bool, error)
- func (s *SQLiteStore) UpdateSkillEvolutionCase(ctx context.Context, evolutionCase *SkillEvolutionCase) error
- func (s *SQLiteStore) UpdateSkillRevision(ctx context.Context, revision *SkillRevision) error
- type ScoreVerdict
- type Scorecard
- type ScoringMode
- type SelectorCuratedAssets
- type SelectorGateCheck
- type SelectorGateMetrics
- type SelectorGateReport
- type SelectorGateRequest
- type SelectorGateSegmentMetrics
- type SelectorGateThresholds
- type SkillCutoverBudgetCheck
- type SkillCutoverBudgetMetrics
- type SkillCutoverBudgetReport
- type SkillCutoverBudgetRequest
- type SkillCutoverBudgetThresholds
- type SkillCutoverLaneAssessment
- type SkillCutoverLaneReadiness
- type SkillCutoverReadinessReport
- type SkillCutoverReadinessRequest
- type SkillDecisionHistoryEntry
- type SkillDecisionHistoryFilter
- type SkillEvolutionCase
- type SkillEvolutionCaseDetail
- type SkillEvolutionCaseFilter
- type SkillEvolutionCaseSpec
- type SkillEvolutionCaseStatus
- type SkillEvolutionMode
- type SkillEvolutionReason
- type SkillOptimizeRequest
- type SkillPromoteResult
- type SkillRevision
- type SkillRevisionDecisionAction
- type SkillRevisionDecisionRequest
- type SkillRevisionFilter
- type SkillRevisionPromotionRecorder
- type SkillRevisionStatus
- type SnapshotDriver
- type Store
- type UserTaskActions
- type UserTaskArtifact
- type UserTaskBlocker
- type UserTaskProjection
- type UserTaskProjectionFilter
- type UserTaskProjectionHandler
- func (h *UserTaskProjectionHandler) CancelTask(c echo.Context) error
- func (h *UserTaskProjectionHandler) GetTask(c echo.Context) error
- func (h *UserTaskProjectionHandler) PerformTaskAction(c echo.Context) error
- func (h *UserTaskProjectionHandler) RegisterRoutes(g *echo.Group)
- func (h *UserTaskProjectionHandler) ResumeTask(c echo.Context) error
- type UserTaskProjectionService
- type UserTaskResearchSource
- type UserTaskSubagentSummary
- type WritePathGuard
Constants ¶
const ( CUAOSWorldMacBenchmarkDatasetName = "computer-use-cua-osworld-macos" CUAOSWorldMacBenchmarkDatasetDescription = "OSWorld-style macOS computer-use benchmark profile for Go-native CUA mode." CUAOSWorldMacBenchmarkDatasetSubject = "computer_use_cua_osworld_macos" CUAOSWorldMacBenchmarkDatasetVersion = "computer-use-cua-osworld-macos-v1" CUAOSWorldMacBenchmarkEvalName = "Computer-Use Go-Native CUA OSWorld-Style macOS Benchmark" CUAOSWorldMacBenchmarkProfile = "computer_use_cua_osworld_macos" )
const ( DesktopChatSuccessRateDatasetName = "computer-use-desktop-chat-macos" DesktopChatSuccessRateDatasetDescription = "Curated macOS desktop-chat success-rate dataset for computer_use task trajectories." DesktopChatSuccessRateDatasetSubject = "computer_use_desktop_chat" DesktopChatSuccessRateDatasetVersion = "computer-use-desktop-chat-macos-v1" DesktopChatSuccessRateEvalName = "Computer-Use macOS Desktop Chat Success Rate" )
const ( Batch1ExecutionDatasetName = "skill-exec-batch1" Batch1ExecutionDatasetDescription = "Curated batch-1 execution equivalence dataset for tool-to-skill migration." Batch1ExecutionDatasetSubject = "skill_execution_batch1" Batch1ExecutionDatasetVersion = "skill-exec-batch1-v6" Batch1ExecutionEvalName = "Skill Execution Batch 1" )
const ( SelectorCuratedDatasetName = "selector-curated" SelectorCuratedDatasetDescription = "Curated selector dry-run routing regression dataset." SelectorCuratedDatasetSubject = "selector_dry_run" SelectorCuratedDatasetVersion = "selector-curated-v4" SelectorCuratedEvalName = "Selector Curated Dry Run" )
const LegacyStoreDBFilename = "harness.db"
Variables ¶
This section is empty.
Functions ¶
func ApplyHarnessContractToExpected ¶
func ApplyHarnessContractToExpected(expected map[string]interface{}, contract HarnessContract) map[string]interface{}
func Batch1ExecutionCaseCount ¶
func Batch1ExecutionCaseCount() int
func BuildHarnessContractContext ¶
func BuildHarnessContractContext(contract HarnessContract) string
func DesktopChatSuccessRateCaseCount ¶
func DesktopChatSuccessRateCaseCount() int
func HarnessAdaptivePolicyMetadata ¶
func HarnessAdaptivePolicyMetadata(policy HarnessAdaptivePolicy) map[string]interface{}
func HarnessContractFallbackPlan ¶
func HarnessContractFallbackPlan(contract HarnessContract) []string
func HarnessContractMetadata ¶
func HarnessContractMetadata(contract HarnessContract) map[string]interface{}
func HarnessContractSuccessCriteria ¶
func HarnessContractSuccessCriteria(contract HarnessContract) []string
func LegacyStoreDBPath ¶
LegacyStoreDBPath returns the legacy standalone harness database path.
func NewRuntimeEventObserverMux ¶
func NewRuntimeEventObserverMux(observers ...tools.RuntimeEventObserver) tools.RuntimeEventObserver
func SelectorCuratedCaseCount ¶
func SelectorCuratedCaseCount() int
SelectorCuratedCaseCount returns the stable case count for the built-in selector routing dataset.
func WithRunContext ¶
func WithRunContext(ctx context.Context, runCtx *RunContext) context.Context
WithRunContext annotates a context with the current run-scoped execution envelope.
Types ¶
type ActionDescriptor ¶
type ActionDriver ¶
type ActionInputDescriptor ¶
type ActionInputDescriptor struct {
Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"`
SubmitLabel string `json:"submit_label,omitempty"`
Fields []ActionInputFieldDescriptor `json:"fields,omitempty"`
}
type ActionInputFieldDescriptor ¶
type ActionInputFieldDescriptor struct {
Key string `json:"key"`
Label string `json:"label"`
Kind string `json:"kind,omitempty"`
Target string `json:"target,omitempty"`
PayloadKey string `json:"payload_key,omitempty"`
Required bool `json:"required,omitempty"`
Placeholder string `json:"placeholder,omitempty"`
Options []string `json:"options,omitempty"`
}
type AgentCompatHandler ¶
type AgentCompatHandler struct {
// contains filtered or unexported fields
}
func NewAgentCompatHandler ¶
func NewAgentCompatHandler(manager *Controller, store *agentpkg.Store, runner *agentpkg.Runner, defaultWorkspaceRoot string) *AgentCompatHandler
func (*AgentCompatHandler) CancelTask ¶
func (h *AgentCompatHandler) CancelTask(c echo.Context) error
func (*AgentCompatHandler) CreateTask ¶
func (h *AgentCompatHandler) CreateTask(c echo.Context) error
func (*AgentCompatHandler) DeleteTask ¶
func (h *AgentCompatHandler) DeleteTask(c echo.Context) error
func (*AgentCompatHandler) RegisterRoutes ¶
func (h *AgentCompatHandler) RegisterRoutes(g *echo.Group)
func (*AgentCompatHandler) SendMessage ¶
func (h *AgentCompatHandler) SendMessage(c echo.Context) error
func (*AgentCompatHandler) SubmitAnswer ¶
func (h *AgentCompatHandler) SubmitAnswer(c echo.Context) error
type ApprovalMode ¶
type ApprovalMode string
const ( ApprovalModeAsk ApprovalMode = "ask" ApprovalModeAllow ApprovalMode = "allow" ApprovalModeDeny ApprovalMode = "deny" )
type ArtifactRef ¶
type ArtifactRef struct {
ID string `json:"id"`
RunID string `json:"run_id"`
Kind string `json:"kind"`
Label string `json:"label,omitempty"`
PathOrURL string `json:"path_or_url,omitempty"`
MIMEType string `json:"mime_type,omitempty"`
SizeBytes int64 `json:"size_bytes,omitempty"`
MetadataJSON string `json:"metadata_json,omitempty"`
}
type Baseline ¶
type Baseline struct {
ID string `json:"id"`
Name string `json:"name"`
Subject string `json:"subject,omitempty"`
OwnerUserID string `json:"owner_user_id,omitempty"`
EvalSpecID string `json:"eval_spec_id"`
EvalRunID string `json:"eval_run_id"`
IsDefault bool `json:"is_default"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type BaselineFilter ¶
type BaselineSpec ¶
type BaselineSpec struct {
Name string `json:"name"`
Subject string `json:"subject,omitempty"`
OwnerUserID string `json:"owner_user_id,omitempty"`
EvalSpecID string `json:"eval_spec_id,omitempty"`
EvalRunID string `json:"eval_run_id"`
IsDefault bool `json:"is_default,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
type Batch1ExecutionAssets ¶
type Batch1ExecutionAssets struct {
Dataset *Dataset `json:"dataset,omitempty"`
DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
EvalSpec *EvalSpec `json:"eval_spec,omitempty"`
}
type BatchTrajectoryCase ¶
type BatchTrajectoryCaseResult ¶
type BatchTrajectoryCaseResult struct {
Input map[string]interface{} `json:"input,omitempty"`
Expected map[string]interface{} `json:"expected,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
TraceSummary string `json:"trace_summary,omitempty"`
Provenance map[string]interface{} `json:"provenance,omitempty"`
}
type BatchTrajectoryExecutor ¶
type BatchTrajectoryExecutor interface {
ExecuteBatchTrajectoryCase(ctx context.Context, c BatchTrajectoryCase) (*BatchTrajectoryCaseResult, error)
}
type BatchTrajectoryRunResult ¶
type BatchTrajectoryRunResult struct {
Manifest DatasetManifest `json:"manifest"`
DatasetVersionSpec DatasetVersionSpec `json:"dataset_version_spec"`
CompletedCaseKeys []string `json:"completed_case_keys,omitempty"`
FailedCaseKeys []string `json:"failed_case_keys,omitempty"`
CheckpointPath string `json:"checkpoint_path,omitempty"`
}
type BatchTrajectoryRunner ¶
type BatchTrajectoryRunner struct {
// contains filtered or unexported fields
}
func NewBatchTrajectoryRunner ¶
func NewBatchTrajectoryRunner(cfg BatchTrajectoryRunnerConfig, executor BatchTrajectoryExecutor) *BatchTrajectoryRunner
func (*BatchTrajectoryRunner) Run ¶
func (r *BatchTrajectoryRunner) Run(ctx context.Context, cases []BatchTrajectoryCase) (*BatchTrajectoryRunResult, error)
type BatchTrajectoryRunnerConfig ¶
type BatchTrajectoryRunnerConfig struct {
DatasetName string `json:"dataset_name"`
DatasetSubject string `json:"dataset_subject"`
DatasetVersion string `json:"dataset_version"`
SourceType string `json:"source_type,omitempty"`
SourceRef string `json:"source_ref,omitempty"`
CreatedBy string `json:"created_by,omitempty"`
CandidateID string `json:"candidate_id,omitempty"`
CheckpointPath string `json:"checkpoint_path"`
DefaultRunKind RunKind `json:"default_run_kind,omitempty"`
DefaultProfile string `json:"default_profile,omitempty"`
Scheduler GroupSchedulerConfig `json:"scheduler,omitempty"`
Scoring GroupScoringConfig `json:"scoring,omitempty"`
RuntimePolicy map[string]interface{} `json:"runtime_policy,omitempty"`
MaxRetries int `json:"max_retries,omitempty"`
}
type CUABenchmarkCaseResult ¶
type CUABenchmarkCaseResult struct {
ID string `json:"id,omitempty"`
Passed bool `json:"passed"`
Critical bool `json:"critical,omitempty"`
VerificationBacked bool `json:"verification_backed,omitempty"`
FailureLabel string `json:"failure_label,omitempty"`
LatencyMS int `json:"latency_ms,omitempty"`
}
func CUAOSWorldMacBenchmarkCaseResultFromSummary ¶
func CUAOSWorldMacBenchmarkCaseResultFromSummary(id string, summary map[string]interface{}) CUABenchmarkCaseResult
type CUABenchmarkReport ¶
type CUABenchmarkReport struct {
CaseCount int `json:"case_count"`
PassedCount int `json:"passed_count"`
CriticalCount int `json:"critical_count"`
CriticalPassedCount int `json:"critical_passed_count"`
PassRate float64 `json:"pass_rate"`
CriticalPassRate float64 `json:"critical_pass_rate"`
UnsafeSendCount int `json:"unsafe_send_count"`
TypedBodyIntoSearchFieldCount int `json:"typed_body_into_search_field_count"`
VerificationBackedRate float64 `json:"verification_backed_rate"`
LatencyP50MS int `json:"latency_p50_ms"`
LatencyP90MS int `json:"latency_p90_ms"`
FailureLabelCounts map[string]int `json:"failure_label_counts,omitempty"`
}
func EvaluateCUABenchmarkReport ¶
func EvaluateCUABenchmarkReport(results []CUABenchmarkCaseResult) CUABenchmarkReport
type CUAOSWorldMacBenchmarkAssets ¶
type CUAOSWorldMacBenchmarkAssets struct {
Dataset *Dataset `json:"dataset,omitempty"`
DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
EvalSpec *EvalSpec `json:"eval_spec,omitempty"`
}
type CanonicalSkillSourceState ¶
type CanonicalSkillSourceState struct {
AbsolutePath string
NormalizedPath string
Content string
ContentSHA256 string
}
func ResolveCanonicalSkillSourceState ¶
func ResolveCanonicalSkillSourceState(skillID string, sourcePath string) (*CanonicalSkillSourceState, error)
func ResolveWritableSkillSourceState ¶
func ResolveWritableSkillSourceState(skillID string, sourcePath string) (*CanonicalSkillSourceState, error)
type CheckpointArtifact ¶
type CheckpointArtifact struct {
RunID string `json:"run_id"`
GroupItemID string `json:"group_item_id,omitempty"`
Artifact ArtifactRef `json:"artifact"`
Payload map[string]interface{} `json:"payload,omitempty"`
}
type CompareEvalRunRequest ¶
type ComparisonCaseDelta ¶
type ComparisonCaseDelta struct {
Key string `json:"key"`
Label string `json:"label,omitempty"`
ItemIndex int `json:"item_index"`
Profile string `json:"profile,omitempty"`
BaseVerdict string `json:"base_verdict,omitempty"`
TargetVerdict string `json:"target_verdict,omitempty"`
BaseStatus string `json:"base_status,omitempty"`
TargetStatus string `json:"target_status,omitempty"`
BaseScore float64 `json:"base_score,omitempty"`
TargetScore float64 `json:"target_score,omitempty"`
DeltaScore float64 `json:"delta_score,omitempty"`
BaseRunID string `json:"base_run_id,omitempty"`
TargetRunID string `json:"target_run_id,omitempty"`
BaseReason string `json:"base_reason,omitempty"`
TargetReason string `json:"target_reason,omitempty"`
BaseFailureLabel string `json:"base_failure_label,omitempty"`
TargetFailureLabel string `json:"target_failure_label,omitempty"`
BaseVerification string `json:"base_verification,omitempty"`
TargetVerification string `json:"target_verification,omitempty"`
BaseEvidenceScore float64 `json:"base_evidence_score,omitempty"`
TargetEvidenceScore float64 `json:"target_evidence_score,omitempty"`
}
type ComparisonReport ¶
type ComparisonReport struct {
ID string `json:"id"`
OwnerUserID string `json:"owner_user_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
EvalSpecID string `json:"eval_spec_id"`
BaseEvalRunID string `json:"base_eval_run_id"`
TargetEvalRunID string `json:"target_eval_run_id"`
Summary map[string]interface{} `json:"summary,omitempty"`
Regressions []ComparisonCaseDelta `json:"regressions,omitempty"`
Improvements []ComparisonCaseDelta `json:"improvements,omitempty"`
ScorerDelta map[string]interface{} `json:"scorer_delta,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
type Controller ¶
type Controller struct {
// contains filtered or unexported fields
}
func NewController ¶
func NewController(store *SQLiteStore, resolver *PolicyResolver) *Controller
func (*Controller) AppendEvent ¶
func (c *Controller) AppendEvent(ctx context.Context, event RunEvent) error
func (*Controller) AttachArtifact ¶
func (c *Controller) AttachArtifact(ctx context.Context, ref ArtifactRef) error
func (*Controller) BuildEvolutionOverview ¶
func (c *Controller) BuildEvolutionOverview( ctx context.Context, skillID string, ownerUserID string, ) (*EvolutionOverview, error)
func (*Controller) BuildSkillEvolutionCaseDetail ¶
func (c *Controller) BuildSkillEvolutionCaseDetail(ctx context.Context, evolutionCase *SkillEvolutionCase) (*SkillEvolutionCaseDetail, error)
func (*Controller) CancelEvalRun ¶
func (*Controller) CancelGroup ¶
func (*Controller) CompareEvalRun ¶
func (c *Controller) CompareEvalRun(ctx context.Context, targetEvalRunID string, req CompareEvalRunRequest) (*ComparisonReport, error)
func (*Controller) CreateBaseline ¶
func (c *Controller) CreateBaseline(ctx context.Context, spec BaselineSpec) (*Baseline, error)
func (*Controller) CreateDataset ¶
func (c *Controller) CreateDataset(ctx context.Context, spec DatasetSpec) (*Dataset, error)
func (*Controller) CreateDatasetVersion ¶
func (c *Controller) CreateDatasetVersion(ctx context.Context, datasetID string, spec DatasetVersionSpec) (*DatasetVersion, error)
func (*Controller) CreateEvalSpec ¶
func (c *Controller) CreateEvalSpec(ctx context.Context, spec EvalSpecSpec) (*EvalSpec, error)
func (*Controller) CreateSkillEvolutionCase ¶
func (c *Controller) CreateSkillEvolutionCase(ctx context.Context, evolutionCase SkillEvolutionCase) (*SkillEvolutionCase, error)
func (*Controller) CreateSkillRevision ¶
func (c *Controller) CreateSkillRevision(ctx context.Context, revision SkillRevision) (*SkillRevision, error)
func (*Controller) EnsureBatch1ExecutionAssets ¶
func (c *Controller) EnsureBatch1ExecutionAssets(ctx context.Context, ownerUserID string) (*Batch1ExecutionAssets, error)
func (*Controller) EnsureDesktopChatSuccessRateAssets ¶
func (c *Controller) EnsureDesktopChatSuccessRateAssets(ctx context.Context, ownerUserID string) (*DesktopChatSuccessRateAssets, error)
func (*Controller) EnsureSelectorCuratedAssets ¶
func (c *Controller) EnsureSelectorCuratedAssets(ctx context.Context, ownerUserID string) (*SelectorCuratedAssets, error)
func (*Controller) EnsureSkillEvolutionCase ¶
func (c *Controller) EnsureSkillEvolutionCase(ctx context.Context, spec SkillEvolutionCaseSpec) (*SkillEvolutionCase, bool, error)
func (*Controller) EvaluateExecutionEquivalence ¶
func (c *Controller) EvaluateExecutionEquivalence(ctx context.Context, targetEvalRunID string, req ExecutionEquivalenceRequest) (*ExecutionEquivalenceReport, error)
func (*Controller) EvaluateSelectorGate ¶
func (c *Controller) EvaluateSelectorGate(ctx context.Context, targetEvalRunID string, req SelectorGateRequest) (*SelectorGateReport, error)
func (*Controller) EvaluateSkillCutoverBudgetGate ¶
func (c *Controller) EvaluateSkillCutoverBudgetGate(ctx context.Context, targetEvalRunID string, req SkillCutoverBudgetRequest) (*SkillCutoverBudgetReport, error)
func (*Controller) EvaluateSkillCutoverReadiness ¶
func (c *Controller) EvaluateSkillCutoverReadiness(ctx context.Context, req SkillCutoverReadinessRequest) (*SkillCutoverReadinessReport, error)
func (*Controller) ExecutionMiddlewares ¶
func (c *Controller) ExecutionMiddlewares() []ExecutionMiddleware
func (*Controller) FindRunByMetadata ¶
func (*Controller) GetComparisonReport ¶
func (c *Controller) GetComparisonReport(ctx context.Context, id string) (*ComparisonReport, error)
func (*Controller) GetDataset ¶
func (*Controller) GetDatasetVersion ¶
func (c *Controller) GetDatasetVersion(ctx context.Context, id string) (*DatasetVersion, error)
func (*Controller) GetEvalRun ¶
func (*Controller) GetEvalRunReport ¶
func (c *Controller) GetEvalRunReport(ctx context.Context, id string) (*EvalRunReport, error)
func (*Controller) GetEvalSpec ¶
func (*Controller) GetGroupReport ¶
func (c *Controller) GetGroupReport(ctx context.Context, id string) (*RunGroupReport, error)
func (*Controller) GetRegisteredDriver ¶
func (c *Controller) GetRegisteredDriver(kind RunKind) Driver
func (*Controller) GetSkillEvolutionCase ¶
func (c *Controller) GetSkillEvolutionCase(ctx context.Context, id string) (*SkillEvolutionCase, error)
func (*Controller) GetSkillEvolutionCaseDetail ¶
func (c *Controller) GetSkillEvolutionCaseDetail(ctx context.Context, id string) (*SkillEvolutionCaseDetail, error)
func (*Controller) GetSkillRevision ¶
func (c *Controller) GetSkillRevision(ctx context.Context, id string) (*SkillRevision, error)
func (*Controller) ImportDatasetBundle ¶
func (c *Controller) ImportDatasetBundle(ctx context.Context, req ImportDatasetBundleRequest) (*ImportDatasetBundleResult, error)
func (*Controller) ListArtifacts ¶
func (c *Controller) ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)
func (*Controller) ListBaselines ¶
func (c *Controller) ListBaselines(ctx context.Context, filter BaselineFilter) ([]Baseline, error)
func (*Controller) ListDatasetVersions ¶
func (c *Controller) ListDatasetVersions(ctx context.Context, datasetID string, limit int) ([]DatasetVersion, error)
func (*Controller) ListDatasets ¶
func (c *Controller) ListDatasets(ctx context.Context, filter DatasetFilter) ([]Dataset, error)
func (*Controller) ListEvalRuns ¶
func (c *Controller) ListEvalRuns(ctx context.Context, filter EvalRunFilter) ([]EvalRun, error)
func (*Controller) ListEvalSpecs ¶
func (c *Controller) ListEvalSpecs(ctx context.Context, filter EvalSpecFilter) ([]EvalSpec, error)
func (*Controller) ListEvents ¶
func (*Controller) ListGroupItems ¶
func (c *Controller) ListGroupItems(ctx context.Context, groupID string) ([]RunGroupItem, error)
func (*Controller) ListGroups ¶
func (c *Controller) ListGroups(ctx context.Context, filter RunGroupFilter) ([]RunGroup, error)
func (*Controller) ListSkillDecisionHistory ¶
func (c *Controller) ListSkillDecisionHistory(ctx context.Context, filter SkillDecisionHistoryFilter) ([]SkillDecisionHistoryEntry, error)
func (*Controller) ListSkillEvolutionCases ¶
func (c *Controller) ListSkillEvolutionCases(ctx context.Context, filter SkillEvolutionCaseFilter) ([]SkillEvolutionCase, error)
func (*Controller) ListSkillRevisions ¶
func (c *Controller) ListSkillRevisions(ctx context.Context, filter SkillRevisionFilter) ([]SkillRevision, error)
func (*Controller) OptimizeSkill ¶
func (c *Controller) OptimizeSkill(ctx context.Context, skillID string, req SkillOptimizeRequest) (*OptimizationTrigger, error)
func (*Controller) PerformAction ¶
func (*Controller) PerformGroupAction ¶
func (*Controller) PromoteGroup ¶
func (c *Controller) PromoteGroup(ctx context.Context, groupID string, spec GroupPromotionSpec) (*GroupPromotionResult, error)
func (*Controller) PromoteSkillRevision ¶
func (c *Controller) PromoteSkillRevision(ctx context.Context, revisionID string, decision SkillRevisionDecisionRequest) (*SkillPromoteResult, error)
func (*Controller) RegisterDriver ¶
func (c *Controller) RegisterDriver(driver Driver)
func (*Controller) RetryFailedGroup ¶
func (*Controller) RollbackSkillRevision ¶
func (c *Controller) RollbackSkillRevision(ctx context.Context, revisionID string, decision SkillRevisionDecisionRequest) (*SkillPromoteResult, error)
func (*Controller) RunTraceSnapshot ¶
func (*Controller) SetJudgeEvaluator ¶
func (c *Controller) SetJudgeEvaluator(evaluator JudgeEvaluator)
func (*Controller) SetOptimizationTriggerer ¶
func (c *Controller) SetOptimizationTriggerer(triggerer OptimizationTriggerer)
func (*Controller) SetReflector ¶
func (c *Controller) SetReflector(reflector ProposalReflector)
func (*Controller) SetRunTraceProvider ¶
func (c *Controller) SetRunTraceProvider(provider RunTraceProvider)
func (*Controller) SetRuntimeReflectionCoordinator ¶
func (c *Controller) SetRuntimeReflectionCoordinator(coordinator *RuntimeReflectionCoordinator)
func (*Controller) SpawnChild ¶
func (*Controller) SubmitEvalRun ¶
func (c *Controller) SubmitEvalRun(ctx context.Context, spec EvalRunSpec) (*EvalRun, error)
func (*Controller) SubmitGroup ¶
func (c *Controller) SubmitGroup(ctx context.Context, spec RunGroupSpec) (*RunGroup, error)
func (*Controller) SyncSnapshot ¶
func (c *Controller) SyncSnapshot(ctx context.Context, snapshot *Run) error
func (*Controller) UpdateSkillEvolutionCase ¶
func (c *Controller) UpdateSkillEvolutionCase(ctx context.Context, evolutionCase SkillEvolutionCase) (*SkillEvolutionCase, error)
func (*Controller) UpdateSkillRevision ¶
func (c *Controller) UpdateSkillRevision(ctx context.Context, revision SkillRevision) (*SkillRevision, error)
func (*Controller) UseExecutionMiddleware ¶
func (c *Controller) UseExecutionMiddleware(mw ExecutionMiddleware)
type Dataset ¶
type Dataset struct {
ID string `json:"id"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Subject string `json:"subject,omitempty"`
DefaultRunKind RunKind `json:"default_run_kind,omitempty"`
DefaultProfile string `json:"default_profile,omitempty"`
ActiveVersionID string `json:"active_version_id,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type DatasetBundleSourcePreview ¶
type DatasetBundleSourcePreview struct {
SourceType string `json:"source_type,omitempty"`
SourceRef string `json:"source_ref,omitempty"`
Dataset DatasetSpec `json:"dataset"`
Version DatasetBundleVersionPreview `json:"version"`
EvalSpecs []ImportDatasetBundleEvalSpec `json:"eval_specs,omitempty"`
MakeActive bool `json:"make_active,omitempty"`
}
type DatasetFilter ¶
type DatasetManifest ¶
type DatasetManifest struct {
Dataset DatasetManifestMeta `json:"dataset,omitempty"`
Defaults DatasetManifestDefaults `json:"defaults,omitempty"`
Items []DatasetManifestItem `json:"items,omitempty"`
}
func Batch1ExecutionDatasetManifest ¶
func Batch1ExecutionDatasetManifest() DatasetManifest
func CUAOSWorldMacBenchmarkManifest ¶
func CUAOSWorldMacBenchmarkManifest() DatasetManifest
func DesktopChatSuccessRateDatasetManifest ¶
func DesktopChatSuccessRateDatasetManifest() DatasetManifest
func SelectorCuratedDatasetManifest ¶
func SelectorCuratedDatasetManifest() DatasetManifest
SelectorCuratedDatasetManifest returns the built-in selector routing dataset that freezes curated dry-run expectations into a reusable Harness manifest.
type DatasetManifestDefaults ¶
type DatasetManifestDefaults struct {
RunKind RunKind `json:"run_kind,omitempty"`
Profile string `json:"profile,omitempty"`
Scheduler GroupSchedulerConfig `json:"scheduler,omitempty"`
Scoring GroupScoringConfig `json:"scoring,omitempty"`
RuntimePolicy map[string]interface{} `json:"runtime_policy,omitempty"`
}
type DatasetManifestItem ¶
type DatasetManifestItem struct {
ID string `json:"id,omitempty"`
RunKind RunKind `json:"run_kind,omitempty"`
Profile string `json:"profile,omitempty"`
Input map[string]interface{} `json:"input,omitempty"`
Expected map[string]interface{} `json:"expected,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
type DatasetManifestMeta ¶
type DatasetSpec ¶
type DatasetSpec struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Subject string `json:"subject,omitempty"`
DefaultRunKind RunKind `json:"default_run_kind,omitempty"`
DefaultProfile string `json:"default_profile,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
func Batch1ExecutionDatasetSpec ¶
func Batch1ExecutionDatasetSpec(ownerUserID string) DatasetSpec
func CUAOSWorldMacBenchmarkDatasetSpec ¶
func CUAOSWorldMacBenchmarkDatasetSpec(ownerUserID string) DatasetSpec
func DesktopChatSuccessRateDatasetSpec ¶
func DesktopChatSuccessRateDatasetSpec(ownerUserID string) DatasetSpec
func SelectorCuratedDatasetSpec ¶
func SelectorCuratedDatasetSpec(ownerUserID string) DatasetSpec
SelectorCuratedDatasetSpec provides the reusable dataset shell for the built-in selector routing manifest.
type DatasetVersion ¶
type DatasetVersion struct {
ID string `json:"id"`
DatasetID string `json:"dataset_id"`
Version string `json:"version"`
ManifestSHA256 string `json:"manifest_sha256,omitempty"`
ItemCount int `json:"item_count"`
SourceType string `json:"source_type,omitempty"`
SourceRef string `json:"source_ref,omitempty"`
Manifest map[string]interface{} `json:"manifest,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
CreatedBy string `json:"created_by,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
func (*DatasetVersion) DecodeManifest ¶
func (v *DatasetVersion) DecodeManifest() (*DatasetManifest, error)
type DatasetVersionSpec ¶
type DatasetVersionSpec struct {
Version string `json:"version,omitempty"`
SourceType string `json:"source_type,omitempty"`
SourceRef string `json:"source_ref,omitempty"`
Manifest map[string]interface{} `json:"manifest"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
CreatedBy string `json:"created_by,omitempty"`
}
func Batch1ExecutionDatasetVersionSpec ¶
func Batch1ExecutionDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
func CUAOSWorldMacBenchmarkDatasetVersionSpec ¶
func CUAOSWorldMacBenchmarkDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
func DesktopChatSuccessRateDatasetVersionSpec ¶
func DesktopChatSuccessRateDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
func SelectorCuratedDatasetVersionSpec ¶
func SelectorCuratedDatasetVersionSpec(createdBy string) (DatasetVersionSpec, error)
SelectorCuratedDatasetVersionSpec freezes the current built-in selector manifest into a versioned DatasetVersionSpec.
type DesktopChatSuccessRateAssets ¶
type DesktopChatSuccessRateAssets struct {
Dataset *Dataset `json:"dataset,omitempty"`
DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
EvalSpec *EvalSpec `json:"eval_spec,omitempty"`
}
type DesktopChatSuccessRateGateReport ¶
type DesktopChatSuccessRateGateReport struct {
CaseCount int `json:"case_count"`
PassedCount int `json:"passed_count"`
PassRate float64 `json:"pass_rate"`
UnsafeSendCount int `json:"unsafe_send_count"`
TypedBodyIntoSearchFieldCount int `json:"typed_body_into_search_field_count"`
ExistingFocusedRegressionPassed bool `json:"existing_focused_regression_passed"`
FailureLabelCounts map[string]int `json:"failure_label_counts,omitempty"`
Checks []DesktopChatSuccessRateGateCheck `json:"checks,omitempty"`
Passed bool `json:"passed"`
}
func EvaluateDesktopChatSuccessRateGate ¶
func EvaluateDesktopChatSuccessRateGate(summary map[string]interface{}, existingFocusedRegressionPassed bool) DesktopChatSuccessRateGateReport
type EvalRun ¶
type EvalRun struct {
ID string `json:"id"`
EvalSpecID string `json:"eval_spec_id"`
GroupID string `json:"group_id"`
DatasetVersionID string `json:"dataset_version_id,omitempty"`
BaselineEvalRunID string `json:"baseline_eval_run_id,omitempty"`
Title string `json:"title,omitempty"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Status RunGroupStatus `json:"status"`
TriggerKind string `json:"trigger_kind,omitempty"`
TriggerRef string `json:"trigger_ref,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
Summary map[string]interface{} `json:"summary,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
StartedAt *time.Time `json:"started_at,omitempty"`
FinishedAt *time.Time `json:"finished_at,omitempty"`
}
type EvalRunFilter ¶
type EvalRunFilter struct {
OwnerUserID string
EvalSpecID string
Statuses []RunGroupStatus
Limit int
}
type EvalRunReport ¶
type EvalRunReport struct {
EvalRun *EvalRun `json:"eval_run"`
EvalSpec *EvalSpec `json:"eval_spec,omitempty"`
Dataset *Dataset `json:"dataset,omitempty"`
DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
GroupReport *RunGroupReport `json:"group_report,omitempty"`
}
type EvalRunSpec ¶
type EvalRunSpec struct {
EvalSpecID string `json:"eval_spec_id"`
BaselineEvalRunID string `json:"baseline_eval_run_id,omitempty"`
Title string `json:"title,omitempty"`
OwnerUserID string `json:"owner_user_id,omitempty"`
TriggerKind string `json:"trigger_kind,omitempty"`
TriggerRef string `json:"trigger_ref,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
type EvalSpec ¶
type EvalSpec struct {
ID string `json:"id"`
Name string `json:"name"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Subject string `json:"subject,omitempty"`
RunKind RunKind `json:"run_kind"`
Profile string `json:"profile,omitempty"`
DatasetID string `json:"dataset_id,omitempty"`
DatasetVersionID string `json:"dataset_version_id,omitempty"`
SchedulerConfig GroupSchedulerConfig `json:"scheduler_config,omitempty"`
ScoringConfig GroupScoringConfig `json:"scoring_config,omitempty"`
RuntimePolicy map[string]interface{} `json:"runtime_policy,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type EvalSpecFilter ¶
type EvalSpecSpec ¶
type EvalSpecSpec struct {
Name string `json:"name"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Subject string `json:"subject,omitempty"`
RunKind RunKind `json:"run_kind,omitempty"`
Profile string `json:"profile,omitempty"`
DatasetID string `json:"dataset_id"`
DatasetVersionID string `json:"dataset_version_id,omitempty"`
SchedulerConfig GroupSchedulerConfig `json:"scheduler"`
ScoringConfig GroupScoringConfig `json:"scoring"`
RuntimePolicy map[string]interface{} `json:"runtime_policy,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
func Batch1ExecutionEvalSpecSpec ¶
func Batch1ExecutionEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
func CUAOSWorldMacBenchmarkEvalSpecSpec ¶
func CUAOSWorldMacBenchmarkEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
func DesktopChatSuccessRateEvalSpecSpec ¶
func DesktopChatSuccessRateEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
func SelectorCuratedEvalSpecSpec ¶
func SelectorCuratedEvalSpecSpec(datasetID, datasetVersionID, ownerUserID string) EvalSpecSpec
SelectorCuratedEvalSpecSpec provides a reusable EvalSpec template wired to the selector dry-run contract. The concrete dataset/version ids must be supplied by the caller.
type EvolutionOverview ¶
type EvolutionOverview struct {
SkillID string `json:"skill_id,omitempty"`
Revisions EvolutionOverviewRevisionCounts `json:"revisions"`
Instructions EvolutionOverviewInstructionCounts `json:"instructions"`
}
type EvolutionOverviewInstructionCounts ¶
type EvolutionOverviewInstructionCounts struct {
Pending int `json:"pending"`
}
type EvolutionOverviewRevisionCounts ¶
type EvolutionOverviewRevisionCounts struct {
Accepted int `json:"accepted"`
}
type ExecPathGuard ¶
type ExecPathGuard struct {
// contains filtered or unexported fields
}
func NewExecPathGuard ¶
func NewExecPathGuard(manager *Controller) *ExecPathGuard
func (*ExecPathGuard) CheckExecPath ¶
func (g *ExecPathGuard) CheckExecPath(ctx context.Context, absPath string) error
func (*ExecPathGuard) CheckExecWorkdir ¶
func (g *ExecPathGuard) CheckExecWorkdir(ctx context.Context, absWorkdir string) error
type ExecutionEquivalenceMetrics ¶
type ExecutionEquivalenceMetrics struct {
CaseCount int `json:"case_count"`
PassedCount int `json:"passed_count"`
PassRate float64 `json:"pass_rate"`
CriticalCaseCount int `json:"critical_case_count"`
CriticalPassedCount int `json:"critical_passed_count"`
CriticalPassRate float64 `json:"critical_pass_rate"`
InfraBlockedCount int `json:"infra_blocked_count"`
BaseInfraBlockedCount int `json:"base_infra_blocked_count"`
TargetInfraBlockedCount int `json:"target_infra_blocked_count"`
InfraBlockedDelta int `json:"infra_blocked_delta"`
BasePassRate float64 `json:"base_pass_rate"`
TargetPassRate float64 `json:"target_pass_rate"`
PassRateDelta float64 `json:"pass_rate_delta"`
BaseVerificationPassRate float64 `json:"base_verification_pass_rate"`
TargetVerificationPassRate float64 `json:"target_verification_pass_rate"`
VerificationPassRateDelta float64 `json:"verification_pass_rate_delta"`
BaseEvidenceBackedPassRate float64 `json:"base_evidence_backed_pass_rate"`
TargetEvidenceBackedPassRate float64 `json:"target_evidence_backed_pass_rate"`
EvidenceBackedPassRateDelta float64 `json:"evidence_backed_pass_rate_delta"`
RegressionCount int `json:"regression_count"`
ImprovementCount int `json:"improvement_count"`
NewFailureCount int `json:"new_failure_count"`
ResolvedFailureCount int `json:"resolved_failure_count"`
CriticalRegressionCount int `json:"critical_regression_count"`
LocaleBreakdown map[string]ExecutionEquivalenceSegmentMetrics `json:"locale_breakdown,omitempty"`
PrimaryRouteBreakdown map[string]ExecutionEquivalenceSegmentMetrics `json:"primary_route_breakdown,omitempty"`
}
type ExecutionEquivalenceReport ¶
type ExecutionEquivalenceReport struct {
TargetEvalRunID string `json:"target_eval_run_id"`
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
ComparisonReportID string `json:"comparison_report_id,omitempty"`
Metrics ExecutionEquivalenceMetrics `json:"metrics"`
Thresholds map[string]interface{} `json:"thresholds,omitempty"`
Checks []ExecutionEquivalenceCheck `json:"checks,omitempty"`
Passed bool `json:"passed"`
CreatedAt time.Time `json:"created_at"`
}
type ExecutionEquivalenceRequest ¶
type ExecutionEquivalenceRequest struct {
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
Thresholds ExecutionEquivalenceThresholds `json:"thresholds,omitempty"`
}
type ExecutionEquivalenceSegmentMetrics ¶
type ExecutionEquivalenceSegmentMetrics struct {
CaseCount int `json:"case_count"`
PassedCount int `json:"passed_count"`
PassRate float64 `json:"pass_rate"`
CriticalCaseCount int `json:"critical_case_count"`
CriticalPassedCount int `json:"critical_passed_count"`
CriticalPassRate float64 `json:"critical_pass_rate"`
InfraBlockedCount int `json:"infra_blocked_count"`
RegressionCount int `json:"regression_count"`
ImprovementCount int `json:"improvement_count"`
NewFailureCount int `json:"new_failure_count"`
ResolvedFailureCount int `json:"resolved_failure_count"`
CriticalRegressionCount int `json:"critical_regression_count"`
}
type ExecutionEquivalenceThresholds ¶
type ExecutionEquivalenceThresholds struct {
MaxPassRateDrop *float64 `json:"max_pass_rate_drop,omitempty"`
MaxCriticalRegressionCount *int `json:"max_critical_regression_count,omitempty"`
MaxVerificationPassRateDrop *float64 `json:"max_verification_pass_rate_drop,omitempty"`
MaxEvidenceBackedPassRateDrop *float64 `json:"max_evidence_backed_pass_rate_drop,omitempty"`
}
func DefaultExecutionEquivalenceThresholds ¶
func DefaultExecutionEquivalenceThresholds() ExecutionEquivalenceThresholds
type ExecutionMiddleware ¶
type ExecutionMiddleware interface {
BeforeStart(ctx context.Context, runCtx *RunContext) error
AfterStart(ctx context.Context, runCtx *RunContext)
OnStartError(ctx context.Context, runCtx *RunContext, runErr error)
}
ExecutionMiddleware provides run-scoped lifecycle hooks around driver start.
func NewSkillCandidateMiddleware ¶
func NewSkillCandidateMiddleware() ExecutionMiddleware
type ExecutionMiddlewareHooks ¶
type ExecutionMiddlewareHooks struct {
BeforeStartFunc func(ctx context.Context, runCtx *RunContext) error
AfterStartFunc func(ctx context.Context, runCtx *RunContext)
OnStartErrorFunc func(ctx context.Context, runCtx *RunContext, runErr error)
}
ExecutionMiddlewareHooks is a small adapter for wiring hook functions.
func (ExecutionMiddlewareHooks) AfterStart ¶
func (h ExecutionMiddlewareHooks) AfterStart(ctx context.Context, runCtx *RunContext)
func (ExecutionMiddlewareHooks) BeforeStart ¶
func (h ExecutionMiddlewareHooks) BeforeStart(ctx context.Context, runCtx *RunContext) error
func (ExecutionMiddlewareHooks) OnStartError ¶
func (h ExecutionMiddlewareHooks) OnStartError(ctx context.Context, runCtx *RunContext, runErr error)
type GroupDispatcher ¶
type GroupDispatcher struct {
// contains filtered or unexported fields
}
func NewGroupDispatcher ¶
func NewGroupDispatcher(manager *Controller) *GroupDispatcher
func (*GroupDispatcher) DispatchOnce ¶
func (d *GroupDispatcher) DispatchOnce(ctx context.Context) error
func (*GroupDispatcher) SetPollInterval ¶
func (d *GroupDispatcher) SetPollInterval(interval time.Duration)
func (*GroupDispatcher) SetRunPollInterval ¶
func (d *GroupDispatcher) SetRunPollInterval(interval time.Duration)
func (*GroupDispatcher) Start ¶
func (d *GroupDispatcher) Start(ctx context.Context)
type GroupPromotionResult ¶
type GroupPromotionResult struct {
Dataset *Dataset `json:"dataset,omitempty"`
DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
EvalSpec *EvalSpec `json:"eval_spec,omitempty"`
}
type GroupPromotionSpec ¶
type GroupSchedulerConfig ¶
type GroupScoringConfig ¶
type GroupScoringConfig struct {
Mode ScoringMode `json:"mode,omitempty"`
RuleProfile string `json:"rule_profile,omitempty"`
JudgeModel string `json:"judge_model,omitempty"`
PassThreshold float64 `json:"pass_threshold,omitempty"`
}
type GuardPipelineError ¶
type GuardPipelineError struct {
Stage RuntimeStage `json:"stage"`
Code string `json:"code"`
Message string `json:"message"`
Details map[string]interface{} `json:"details,omitempty"`
// contains filtered or unexported fields
}
GuardPipelineError normalizes preflight failures before a run is dispatched.
func (*GuardPipelineError) Error ¶
func (e *GuardPipelineError) Error() string
func (*GuardPipelineError) ToolRuntimeCode ¶
func (e *GuardPipelineError) ToolRuntimeCode() string
func (*GuardPipelineError) ToolRuntimeDetails ¶
func (e *GuardPipelineError) ToolRuntimeDetails() map[string]interface{}
func (*GuardPipelineError) Unwrap ¶
func (e *GuardPipelineError) Unwrap() error
type Handler ¶
type Handler struct {
// contains filtered or unexported fields
}
func NewHandler ¶
func NewHandler(manager *Controller) *Handler
func (*Handler) EnsureBatch1ExecutionAssets ¶
func (*Handler) EnsureSelectorCuratedAssets ¶
func (*Handler) EvaluateExecutionEquivalence ¶
func (*Handler) EvaluateSkillCutoverBudgetGate ¶
func (*Handler) EvaluateSkillCutoverReadiness ¶
func (*Handler) GetSkillEvolutionCase ¶
func (*Handler) ImportDatasetBundleFromSource ¶
func (*Handler) ListSkillDecisionHistory ¶
func (*Handler) ListSkillEvolutionCases ¶
func (*Handler) PreviewDatasetBundleFromSource ¶
func (*Handler) RegisterRoutes ¶
func (*Handler) RollbackSkillRevision ¶
func (*Handler) SetDetailProvider ¶
func (h *Handler) SetDetailProvider(provider RunDetailProvider)
func (*Handler) SetEvolutionProposalSummaryProvider ¶
func (h *Handler) SetEvolutionProposalSummaryProvider(provider EvolutionProposalSummaryProvider)
type HarnessAPICheck ¶
type HarnessAdaptivePolicy ¶
type HarnessAdaptivePolicy struct {
Profile string `json:"profile,omitempty"`
Reason string `json:"reason,omitempty"`
EnableExternalQA bool `json:"enable_external_qa,omitempty"`
EnableBrowserQA bool `json:"enable_browser_qa,omitempty"`
EnableCheckpoints bool `json:"enable_checkpoints,omitempty"`
MaxRecoveryAttempts int `json:"max_recovery_attempts,omitempty"`
CheckpointInterval int `json:"checkpoint_interval,omitempty"`
}
func DeriveHarnessAdaptivePolicy ¶
func DeriveHarnessAdaptivePolicy(model string, kind RunKind, contract HarnessContract) HarnessAdaptivePolicy
type HarnessBrowserCheck ¶
type HarnessBrowserCheck struct {
Name string `json:"name,omitempty"`
Target string `json:"target,omitempty"`
Expectation string `json:"expectation,omitempty"`
RequiredObservation string `json:"required_observation,omitempty"`
RequiredArtifact string `json:"required_artifact,omitempty"`
FailureLabel string `json:"failure_label,omitempty"`
RequireScreenshot bool `json:"require_screenshot,omitempty"`
}
type HarnessCheckpoint ¶
type HarnessCheckpoint struct {
Version string `json:"version,omitempty"`
RunID string `json:"run_id,omitempty"`
GroupID string `json:"group_id,omitempty"`
GroupItemID string `json:"group_item_id,omitempty"`
AttemptIndex int `json:"attempt_index,omitempty"`
Goal string `json:"goal,omitempty"`
Summary string `json:"summary,omitempty"`
VerifiedEvidence []string `json:"verified_evidence,omitempty"`
UnresolvedRisks []string `json:"unresolved_risks,omitempty"`
FailureLabels []string `json:"failure_labels,omitempty"`
NextContract HarnessContract `json:"next_contract,omitempty"`
EvaluatorInput string `json:"evaluator_input,omitempty"`
RecommendedResume string `json:"recommended_resume,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
type HarnessContract ¶
type HarnessContract struct {
Deliverables []string `json:"deliverables,omitempty"`
SuccessCriteria []string `json:"success_criteria,omitempty"`
ExpectedArtifacts []HarnessExpectedArtifact `json:"expected_artifacts,omitempty"`
RequiredToolCalls []string `json:"required_tool_calls,omitempty"`
ForbiddenToolCalls []string `json:"forbidden_tool_calls,omitempty"`
RequiredChecks []string `json:"required_checks,omitempty"`
RequiredObservations []string `json:"required_observations,omitempty"`
ForbiddenObservations []string `json:"forbidden_observations,omitempty"`
BrowserChecks []HarnessBrowserCheck `json:"browser_checks,omitempty"`
APIChecks []HarnessAPICheck `json:"api_checks,omitempty"`
FallbackOrder []string `json:"fallback_order,omitempty"`
StopConditions []string `json:"stop_conditions,omitempty"`
EvaluatorHints []string `json:"evaluator_hints,omitempty"`
RiskLevel string `json:"risk_level,omitempty"`
}
func DecodeHarnessContract ¶
func DecodeHarnessContract(sources ...map[string]interface{}) HarnessContract
type HarnessExpectedArtifact ¶
type HarnessSubagentExecutor ¶
type HarnessSubagentExecutor struct {
// contains filtered or unexported fields
}
func NewSubagentExecutor ¶
func NewSubagentExecutor(manager *Controller, agents *config.AgentsConfig) *HarnessSubagentExecutor
func (*HarnessSubagentExecutor) ExecuteIsolated ¶
func (e *HarnessSubagentExecutor) ExecuteIsolated(ctx context.Context, req tools.SubagentRequest) (*tools.SubagentResult, error)
func (*HarnessSubagentExecutor) ExecuteSubagent ¶
func (e *HarnessSubagentExecutor) ExecuteSubagent(ctx context.Context, req tools.SubagentRequest) (*tools.SubagentResult, error)
type HarnessVerificationResult ¶
type HarnessVerificationResult struct {
Passed bool `json:"passed"`
Retryable bool `json:"retryable"`
FailureLabel string `json:"failure_label,omitempty"`
Summary string `json:"summary,omitempty"`
OutcomeScore float64 `json:"outcome_score,omitempty"`
EvidenceScore float64 `json:"evidence_score,omitempty"`
ExecutionScore float64 `json:"execution_score,omitempty"`
Observations []string `json:"observations,omitempty"`
Checks []map[string]interface{} `json:"checks,omitempty"`
Artifacts []map[string]interface{} `json:"artifacts,omitempty"`
TraceSummary map[string]interface{} `json:"trace_summary,omitempty"`
}
type ImportDatasetBundleEvalSpec ¶
type ImportDatasetBundleEvalSpec struct {
Name string `json:"name"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Subject string `json:"subject,omitempty"`
RunKind RunKind `json:"run_kind,omitempty"`
Profile string `json:"profile,omitempty"`
SchedulerConfig GroupSchedulerConfig `json:"scheduler,omitempty"`
ScoringConfig GroupScoringConfig `json:"scoring,omitempty"`
RuntimePolicy map[string]interface{} `json:"runtime_policy,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
type ImportDatasetBundleFromSourceRequest ¶
type ImportDatasetBundleFromSourceRequest struct {
SourceType string `json:"source_type,omitempty"`
Path string `json:"path,omitempty"`
Source string `json:"source,omitempty"`
BundlePath string `json:"bundle_path,omitempty"`
Version string `json:"version,omitempty"`
MakeActive *bool `json:"make_active,omitempty"`
}
type ImportDatasetBundleRequest ¶
type ImportDatasetBundleRequest struct {
SourceType string `json:"source_type,omitempty"`
SourceRef string `json:"source_ref,omitempty"`
Dataset DatasetSpec `json:"dataset"`
Version DatasetVersionSpec `json:"version"`
EvalSpecs []ImportDatasetBundleEvalSpec `json:"eval_specs,omitempty"`
MakeActive bool `json:"make_active,omitempty"`
}
type ImportDatasetBundleResult ¶
type ImportDatasetBundleResult struct {
Dataset *Dataset `json:"dataset,omitempty"`
DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
EvalSpecs []EvalSpec `json:"eval_specs,omitempty"`
}
type JudgeEvaluationRequest ¶
type JudgeEvaluationRequest struct {
Model string
Group *RunGroup
Item *RunGroupItem
Run *Run
Calibration map[string]interface{}
Verification *HarnessVerificationResult
}
type JudgeEvaluationResult ¶
type JudgeEvaluator ¶
type JudgeEvaluator interface {
Evaluate(ctx context.Context, req JudgeEvaluationRequest) (*JudgeEvaluationResult, error)
}
type LLMJudgeEvaluator ¶
type LLMJudgeEvaluator struct {
// contains filtered or unexported fields
}
func NewLLMJudgeEvaluator ¶
func NewLLMJudgeEvaluator(llmCaller judgeLLMCaller) *LLMJudgeEvaluator
func (*LLMJudgeEvaluator) Evaluate ¶
func (e *LLMJudgeEvaluator) Evaluate(ctx context.Context, req JudgeEvaluationRequest) (*JudgeEvaluationResult, error)
type LegacyStoreMigrationResult ¶
LegacyStoreMigrationResult describes a one-time import from the legacy standalone harness.db file into the shared blue.db store.
func MigrateLegacyStore ¶
func MigrateLegacyStore(ctx context.Context, db *sql.DB, dataDir string) (*LegacyStoreMigrationResult, error)
MigrateLegacyStore imports rows from the legacy harness.db file into the shared SQLite database and archives the standalone file afterward.
type Manager ¶
type Manager interface {
Submit(ctx context.Context, spec RunSpec) (*Run, error)
Get(ctx context.Context, id string) (*Run, error)
List(ctx context.Context, filter RunFilter) ([]Run, error)
Cancel(ctx context.Context, id string, reason string) error
PerformAction(ctx context.Context, id string, action string, input map[string]interface{}) (*Run, error)
SpawnChild(ctx context.Context, parentID string, spec RunSpec) (*Run, error)
AppendEvent(ctx context.Context, event RunEvent) error
AttachArtifact(ctx context.Context, ref ArtifactRef) error
}
type OptimizationReason ¶
type OptimizationReason string
const ( OptimizationReasonRuntimeSkillFailure OptimizationReason = "runtime_skill_failure" OptimizationReasonRuntimeSkillCapture OptimizationReason = "runtime_skill_capture" OptimizationReasonSelectorGateFailed OptimizationReason = "selector_gate_failed" OptimizationReasonExecutionGateFailed OptimizationReason = "execution_gate_failed" OptimizationReasonBudgetGateFailed OptimizationReason = "budget_gate_failed" OptimizationReasonCutoverBlocking OptimizationReason = "cutover_readiness_blocking" OptimizationReasonManualSkillOptimize OptimizationReason = "manual_skill_optimize" OptimizationReasonSelectorGatePassed OptimizationReason = "selector_gate_passed" OptimizationReasonExecutionGatePassed OptimizationReason = "execution_gate_passed" OptimizationReasonBudgetGatePassed OptimizationReason = "budget_gate_passed" )
type OptimizationSurface ¶
type OptimizationSurface string
const ( OptimizationSurfaceConstraints OptimizationSurface = "constraints" OptimizationSurfaceRunnerCode OptimizationSurface = "runner_code" OptimizationSurfaceBuildRecipe OptimizationSurface = "build_recipe" OptimizationSurfaceSkillDefinition OptimizationSurface = "skill_definition" )
type OptimizationTrigger ¶
type OptimizationTrigger struct {
Reason OptimizationReason `json:"reason"`
CandidateID string `json:"candidate_id,omitempty"`
EvalRunID string `json:"eval_run_id,omitempty"`
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
OptimizationRun bool `json:"optimization_run,omitempty"`
OptimizationSurface OptimizationSurface `json:"optimization_surface,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
type OptimizationTriggerer ¶
type OptimizationTriggerer interface {
TriggerOptimization(ctx context.Context, event OptimizationTrigger) error
}
type PolicyResolver ¶
type PolicyResolver struct {
// contains filtered or unexported fields
}
func NewPolicyResolver ¶
func NewPolicyResolver(cfg config.HarnessConfig, agents *config.AgentsConfig) *PolicyResolver
func (*PolicyResolver) ArtifactRoot ¶
func (r *PolicyResolver) ArtifactRoot(runID string) string
func (*PolicyResolver) IsProtectedPath ¶
func (r *PolicyResolver) IsProtectedPath(path string, artifactRoot string) bool
func (*PolicyResolver) Resolve ¶
func (r *PolicyResolver) Resolve(spec RunSpec) RunSpec
func (*PolicyResolver) ResolveChild ¶
func (r *PolicyResolver) ResolveChild(parent *Run, spec RunSpec) (RunSpec, error)
type ProposalReflector ¶
type ProposalReflector interface {
Reflect(ctx context.Context, input selfreflect.Input) (*selfreflect.Result, error)
}
type Run ¶
type Run struct {
ID string `json:"id"`
RootRunID string `json:"root_run_id"`
ParentRunID string `json:"parent_run_id,omitempty"`
GroupID string `json:"group_id,omitempty"`
GroupItemID string `json:"group_item_id,omitempty"`
AttemptIndex int `json:"attempt_index,omitempty"`
Kind RunKind `json:"kind"`
Status RunStatus `json:"status"`
RuntimeState agentpkg.RuntimeState `json:"runtime_state,omitempty"`
UserID string `json:"user_id,omitempty"`
ConversationID string `json:"conversation_id,omitempty"`
SessionID string `json:"session_id,omitempty"`
AgentID string `json:"agent_id,omitempty"`
Goal string `json:"goal"`
ProviderID string `json:"provider_id,omitempty"`
Model string `json:"model,omitempty"`
Result string `json:"result,omitempty"`
Error string `json:"error,omitempty"`
Depth int `json:"depth"`
CurrentStep int `json:"current_step"`
Progress int `json:"progress"`
WorkspaceRoot string `json:"workspace_root,omitempty"`
ArtifactRoot string `json:"artifact_root,omitempty"`
SandboxMode string `json:"sandbox_mode,omitempty"`
ApprovalMode ApprovalMode `json:"approval_mode,omitempty"`
MaxDuration time.Duration `json:"max_duration,omitempty"`
MaxSteps int `json:"max_steps,omitempty"`
MaxToolRounds int `json:"max_tool_rounds,omitempty"`
MaxSubagents int `json:"max_subagents,omitempty"`
MaxDepth int `json:"max_depth,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
StartedAt *time.Time `json:"started_at,omitempty"`
FinishedAt *time.Time `json:"finished_at,omitempty"`
}
type RunActionAvailability ¶
type RunActionAvailability struct {
Items []ActionDescriptor `json:"items,omitempty"`
}
type RunContext ¶
type RunContext struct {
Run *Run
Parent *Run
Driver Driver
Controller *Controller
Values map[string]interface{}
}
RunContext captures the run-scoped execution envelope shared by controller, middlewares, and drivers during dispatch.
func GetRunContext ¶
func GetRunContext(ctx context.Context) *RunContext
GetRunContext returns the run-scoped execution envelope carried on the context.
type RunDetail ¶
type RunDetail struct {
Run *Run `json:"run"`
Actions RunActionAvailability `json:"actions"`
Events []RunEvent `json:"events"`
Artifacts []ArtifactRef `json:"artifacts"`
RunTrace *RunTrace `json:"run_trace,omitempty"`
PendingApprovals []map[string]interface{} `json:"pending_approvals,omitempty"`
PendingQuestions []map[string]interface{} `json:"pending_questions,omitempty"`
}
type RunDetailProvider ¶
type RunEnv ¶
type RunEnv struct {
Manager *Controller
RunContext *RunContext
}
type RunEvent ¶
type RunEvent struct {
ID string `json:"id"`
RunID string `json:"run_id"`
RootRunID string `json:"root_run_id,omitempty"`
ParentRunID string `json:"parent_run_id,omitempty"`
Type string `json:"type"`
StepIndex int `json:"step_index,omitempty"`
ToolName string `json:"tool_name,omitempty"`
CapabilityKind string `json:"capability_kind,omitempty"`
Message string `json:"message,omitempty"`
PayloadJSON string `json:"payload_json,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
type RunGroup ¶
type RunGroup struct {
ID string `json:"id"`
Kind RunGroupKind `json:"kind"`
Title string `json:"title,omitempty"`
Status RunGroupStatus `json:"status"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Subject string `json:"subject,omitempty"`
SchedulerConfig GroupSchedulerConfig `json:"scheduler_config,omitempty"`
ScoringConfig GroupScoringConfig `json:"scoring_config,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
Summary map[string]interface{} `json:"summary,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
StartedAt *time.Time `json:"started_at,omitempty"`
FinishedAt *time.Time `json:"finished_at,omitempty"`
}
type RunGroupFilter ¶
type RunGroupFilter struct {
OwnerUserID string
Kinds []RunGroupKind
Statuses []RunGroupStatus
Limit int
}
type RunGroupItem ¶
type RunGroupItem struct {
ID string `json:"id"`
GroupID string `json:"group_id"`
Index int `json:"index"`
RunKind RunKind `json:"run_kind"`
Profile string `json:"profile,omitempty"`
Input map[string]interface{} `json:"input,omitempty"`
Expected map[string]interface{} `json:"expected,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
Status RunGroupItemStatus `json:"status"`
LatestRunID string `json:"latest_run_id,omitempty"`
AttemptCount int `json:"attempt_count,omitempty"`
MaxAttempts int `json:"max_attempts,omitempty"`
LeaseOwner string `json:"lease_owner,omitempty"`
LeaseExpiresAt *time.Time `json:"lease_expires_at,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type RunGroupItemSpec ¶
type RunGroupItemStatus ¶
type RunGroupItemStatus string
const ( RunGroupItemStatusPending RunGroupItemStatus = "pending" RunGroupItemStatusQueued RunGroupItemStatus = "queued" RunGroupItemStatusRunning RunGroupItemStatus = "running" RunGroupItemStatusScoring RunGroupItemStatus = "scoring" RunGroupItemStatusPassed RunGroupItemStatus = "passed" RunGroupItemStatusFailed RunGroupItemStatus = "failed" RunGroupItemStatusError RunGroupItemStatus = "error" RunGroupItemStatusCancelled RunGroupItemStatus = "cancelled" )
type RunGroupKind ¶
type RunGroupKind string
const ( RunGroupKindEval RunGroupKind = "eval" RunGroupKindExperiment RunGroupKind = "experiment" RunGroupKindBatch RunGroupKind = "batch" )
type RunGroupReport ¶
type RunGroupReport struct {
Group *RunGroup `json:"group"`
Items []RunGroupItem `json:"items,omitempty"`
VerdictCounts map[string]int `json:"verdict_counts,omitempty"`
OverallScore float64 `json:"overall_score,omitempty"`
PassRate float64 `json:"pass_rate,omitempty"`
Breakdown map[string]interface{} `json:"breakdown,omitempty"`
FailedItems []map[string]interface{} `json:"failed_items,omitempty"`
LinkedRuns []Run `json:"linked_runs,omitempty"`
Artifacts []ArtifactRef `json:"artifacts,omitempty"`
Scorecards []Scorecard `json:"scorecards,omitempty"`
RuntimeEvidence map[string][]RuntimeEvidenceEntry `json:"runtime_evidence,omitempty"`
RuntimeTraces map[string]RunTrace `json:"runtime_traces,omitempty"`
ItemContracts map[string]HarnessContract `json:"item_contracts,omitempty"`
Checkpoints []CheckpointArtifact `json:"checkpoints,omitempty"`
}
type RunGroupSpec ¶
type RunGroupSpec struct {
Kind RunGroupKind `json:"kind"`
Title string `json:"title,omitempty"`
Subject string `json:"subject,omitempty"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
SchedulerConfig GroupSchedulerConfig `json:"scheduler"`
ScoringConfig GroupScoringConfig `json:"scoring"`
Items []RunGroupItemSpec `json:"items"`
}
type RunGroupStatus ¶
type RunGroupStatus string
const ( RunGroupStatusPending RunGroupStatus = "pending" RunGroupStatusQueued RunGroupStatus = "queued" RunGroupStatusRunning RunGroupStatus = "running" RunGroupStatusScoring RunGroupStatus = "scoring" RunGroupStatusCompleted RunGroupStatus = "completed" RunGroupStatusPartial RunGroupStatus = "partial" RunGroupStatusFailed RunGroupStatus = "failed" RunGroupStatusCancelled RunGroupStatus = "cancelled" )
type RunSpec ¶
type RunSpec struct {
Kind RunKind `json:"kind"`
Goal string `json:"goal"`
UserID string `json:"user_id,omitempty"`
ConversationID string `json:"conversation_id,omitempty"`
SessionID string `json:"session_id,omitempty"`
ParentRunID string `json:"parent_run_id,omitempty"`
GroupID string `json:"group_id,omitempty"`
GroupItemID string `json:"group_item_id,omitempty"`
AttemptIndex int `json:"attempt_index,omitempty"`
AgentID string `json:"agent_id,omitempty"`
ProviderID string `json:"provider_id,omitempty"`
Model string `json:"model,omitempty"`
WorkspaceRoot string `json:"workspace_root,omitempty"`
SandboxMode string `json:"sandbox_mode,omitempty"`
ApprovalMode ApprovalMode `json:"approval_mode,omitempty"`
MaxDuration time.Duration `json:"max_duration,omitempty"`
MaxSteps int `json:"max_steps,omitempty"`
MaxToolRounds int `json:"max_tool_rounds,omitempty"`
MaxSubagents int `json:"max_subagents,omitempty"`
MaxDepth int `json:"max_depth,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
type RunStatus ¶
type RunStatus string
const ( RunStatusPending RunStatus = "pending" RunStatusPlanning RunStatus = "planning" RunStatusWaitingInput RunStatus = "waiting_input" RunStatusExecuting RunStatus = "executing" RunStatusVerifying RunStatus = "verifying" RunStatusCompleted RunStatus = "completed" RunStatusFailed RunStatus = "failed" RunStatusCancelled RunStatus = "cancelled" RunStatusAborted RunStatus = "aborted" )
type RunTrace ¶
type RunTrace struct {
RunID string `json:"run_id"`
RootRunID string `json:"root_run_id,omitempty"`
ParentRunID string `json:"parent_run_id,omitempty"`
Kind RunKind `json:"kind,omitempty"`
Status RunStatus `json:"status,omitempty"`
StartedAt *time.Time `json:"started_at,omitempty"`
FinishedAt *time.Time `json:"finished_at,omitempty"`
LatencyMs int64 `json:"latency_ms,omitempty"`
Stages []RunTraceStage `json:"stages,omitempty"`
Events []RunTraceEvent `json:"events,omitempty"`
Artifacts []ArtifactRef `json:"artifacts,omitempty"`
}
type RunTraceCollector ¶
type RunTraceCollector struct {
// contains filtered or unexported fields
}
func NewRunTraceCollector ¶
func NewRunTraceCollector(manager *Controller) *RunTraceCollector
func (*RunTraceCollector) Middleware ¶
func (c *RunTraceCollector) Middleware() ExecutionMiddleware
type RunTraceEvent ¶
type RunTraceProvider ¶
type RunTraceStage ¶
type RuntimeEvidenceEntry ¶
type RuntimeEvidenceEntry struct {
ID string `json:"id"`
RunID string `json:"run_id"`
StepIndex int `json:"step_index,omitempty"`
PlannerRound int `json:"planner_round,omitempty"`
EventType string `json:"event_type"`
Summary string `json:"summary,omitempty"`
PayloadJSON string `json:"payload_json,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
type RuntimeEvidenceProvider ¶
type RuntimeEvidenceProvider interface {
ListRuntimeEvidence(ctx context.Context, run *Run) ([]RuntimeEvidenceEntry, error)
}
type RuntimeObserver ¶
type RuntimeObserver struct {
// contains filtered or unexported fields
}
func NewRuntimeObserver ¶
func NewRuntimeObserver(manager *Controller) *RuntimeObserver
func (*RuntimeObserver) OnApprovalRequested ¶
func (o *RuntimeObserver) OnApprovalRequested(event tools.ApprovalRuntimeEvent)
func (*RuntimeObserver) OnApprovalResolved ¶
func (o *RuntimeObserver) OnApprovalResolved(event tools.ApprovalRuntimeEvent)
func (*RuntimeObserver) OnQuestionRequested ¶
func (o *RuntimeObserver) OnQuestionRequested(event tools.QuestionRuntimeEvent)
func (*RuntimeObserver) OnQuestionResolved ¶
func (o *RuntimeObserver) OnQuestionResolved(event tools.QuestionRuntimeEvent)
func (*RuntimeObserver) OnToolFinished ¶
func (o *RuntimeObserver) OnToolFinished(event tools.ToolRuntimeEvent)
func (*RuntimeObserver) OnToolRequested ¶
func (o *RuntimeObserver) OnToolRequested(event tools.ToolRuntimeEvent)
type RuntimeReflectionCoordinator ¶
type RuntimeReflectionCoordinator struct {
// contains filtered or unexported fields
}
func NewRuntimeReflectionCoordinator ¶
func NewRuntimeReflectionCoordinator(controller *Controller, cfg config.RuntimeReflectionConfig) *RuntimeReflectionCoordinator
func (*RuntimeReflectionCoordinator) OnApprovalRequested ¶
func (c *RuntimeReflectionCoordinator) OnApprovalRequested(event tools.ApprovalRuntimeEvent)
func (*RuntimeReflectionCoordinator) OnApprovalResolved ¶
func (c *RuntimeReflectionCoordinator) OnApprovalResolved(event tools.ApprovalRuntimeEvent)
func (*RuntimeReflectionCoordinator) OnQuestionRequested ¶
func (c *RuntimeReflectionCoordinator) OnQuestionRequested(event tools.QuestionRuntimeEvent)
func (*RuntimeReflectionCoordinator) OnQuestionResolved ¶
func (c *RuntimeReflectionCoordinator) OnQuestionResolved(event tools.QuestionRuntimeEvent)
func (*RuntimeReflectionCoordinator) OnRunTerminal ¶
func (c *RuntimeReflectionCoordinator) OnRunTerminal(ctx context.Context, run *Run)
func (*RuntimeReflectionCoordinator) OnToolFinished ¶
func (c *RuntimeReflectionCoordinator) OnToolFinished(event tools.ToolRuntimeEvent)
func (*RuntimeReflectionCoordinator) OnToolRequested ¶
func (c *RuntimeReflectionCoordinator) OnToolRequested(event tools.ToolRuntimeEvent)
type RuntimeStage ¶
type RuntimeStage string
const ( RuntimeStageNormalize RuntimeStage = "normalize" RuntimeStagePolicy RuntimeStage = "policy" RuntimeStageApproval RuntimeStage = "approval" RuntimeStageExecute RuntimeStage = "execute" RuntimeStageFinalize RuntimeStage = "finalize" )
type SQLiteStore ¶
type SQLiteStore struct {
// contains filtered or unexported fields
}
func NewSQLiteStore ¶
func NewSQLiteStore(db *sql.DB) (*SQLiteStore, error)
func NewSQLiteStoreWithReadDB ¶
func NewSQLiteStoreWithReadDB(writeDB, readDB *sql.DB) (*SQLiteStore, error)
func (*SQLiteStore) AppendEvent ¶
func (s *SQLiteStore) AppendEvent(ctx context.Context, event RunEvent) error
func (*SQLiteStore) AttachArtifact ¶
func (s *SQLiteStore) AttachArtifact(ctx context.Context, ref ArtifactRef) error
func (*SQLiteStore) AttachScorecard ¶
func (s *SQLiteStore) AttachScorecard(ctx context.Context, scorecard Scorecard) error
func (*SQLiteStore) BuildEvolutionOverview ¶
func (s *SQLiteStore) BuildEvolutionOverview( ctx context.Context, skillID string, _ string, ) (*EvolutionOverview, error)
func (*SQLiteStore) ClaimNextGroupItem ¶
func (s *SQLiteStore) ClaimNextGroupItem(ctx context.Context, groupID, workerID string, leaseTTL time.Duration, now time.Time) (*RunGroupItem, error)
func (*SQLiteStore) ClearDefaultBaseline ¶
func (s *SQLiteStore) ClearDefaultBaseline(ctx context.Context, evalSpecID string) error
func (*SQLiteStore) CountGroupItemsByStatuses ¶
func (s *SQLiteStore) CountGroupItemsByStatuses(ctx context.Context, groupID string, statuses []RunGroupItemStatus) (int, error)
func (*SQLiteStore) CreateBaseline ¶
func (s *SQLiteStore) CreateBaseline(ctx context.Context, baseline *Baseline) error
func (*SQLiteStore) CreateComparisonReport ¶
func (s *SQLiteStore) CreateComparisonReport(ctx context.Context, report *ComparisonReport) error
func (*SQLiteStore) CreateDataset ¶
func (s *SQLiteStore) CreateDataset(ctx context.Context, dataset *Dataset) error
func (*SQLiteStore) CreateDatasetVersion ¶
func (s *SQLiteStore) CreateDatasetVersion(ctx context.Context, version *DatasetVersion) error
func (*SQLiteStore) CreateEvalRun ¶
func (s *SQLiteStore) CreateEvalRun(ctx context.Context, evalRun *EvalRun) error
func (*SQLiteStore) CreateEvalSpec ¶
func (s *SQLiteStore) CreateEvalSpec(ctx context.Context, spec *EvalSpec) error
func (*SQLiteStore) CreateGroup ¶
func (s *SQLiteStore) CreateGroup(ctx context.Context, group *RunGroup) error
func (*SQLiteStore) CreateGroupItems ¶
func (s *SQLiteStore) CreateGroupItems(ctx context.Context, items []RunGroupItem) error
func (*SQLiteStore) CreateRun ¶
func (s *SQLiteStore) CreateRun(ctx context.Context, run *Run) error
func (*SQLiteStore) CreateSkillEvolutionCase ¶
func (s *SQLiteStore) CreateSkillEvolutionCase(ctx context.Context, evolutionCase *SkillEvolutionCase) error
func (*SQLiteStore) CreateSkillRevision ¶
func (s *SQLiteStore) CreateSkillRevision(ctx context.Context, revision *SkillRevision) error
func (*SQLiteStore) DeleteRun ¶
func (s *SQLiteStore) DeleteRun(ctx context.Context, id string) error
func (*SQLiteStore) FindDatasetByOwnerAndName ¶
func (*SQLiteStore) FindDatasetVersionByDatasetAndVersion ¶
func (s *SQLiteStore) FindDatasetVersionByDatasetAndVersion(ctx context.Context, datasetID, version string) (*DatasetVersion, error)
func (*SQLiteStore) FindEvalSpecByOwnerDatasetAndName ¶
func (*SQLiteStore) FindLatestSkillEvolutionCaseByDedupKey ¶
func (s *SQLiteStore) FindLatestSkillEvolutionCaseByDedupKey(ctx context.Context, skillID string, ownerUserID string, dedupKey string) (*SkillEvolutionCase, error)
func (*SQLiteStore) FindRunByMetadata ¶
func (*SQLiteStore) GetBaseline ¶
func (*SQLiteStore) GetComparisonReport ¶
func (s *SQLiteStore) GetComparisonReport(ctx context.Context, id string) (*ComparisonReport, error)
func (*SQLiteStore) GetDataset ¶
func (*SQLiteStore) GetDatasetVersion ¶
func (s *SQLiteStore) GetDatasetVersion(ctx context.Context, id string) (*DatasetVersion, error)
func (*SQLiteStore) GetEvalRun ¶
func (*SQLiteStore) GetEvalSpec ¶
func (*SQLiteStore) GetGroupItem ¶
func (s *SQLiteStore) GetGroupItem(ctx context.Context, id string) (*RunGroupItem, error)
func (*SQLiteStore) GetSkillEvolutionCase ¶
func (s *SQLiteStore) GetSkillEvolutionCase(ctx context.Context, id string) (*SkillEvolutionCase, error)
func (*SQLiteStore) GetSkillRevision ¶
func (s *SQLiteStore) GetSkillRevision(ctx context.Context, id string) (*SkillRevision, error)
func (*SQLiteStore) LatestScorecardForItem ¶
func (*SQLiteStore) ListArtifacts ¶
func (s *SQLiteStore) ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)
func (*SQLiteStore) ListBaselines ¶
func (s *SQLiteStore) ListBaselines(ctx context.Context, filter BaselineFilter) ([]Baseline, error)
func (*SQLiteStore) ListDatasetVersions ¶
func (s *SQLiteStore) ListDatasetVersions(ctx context.Context, datasetID string, limit int) ([]DatasetVersion, error)
func (*SQLiteStore) ListDatasets ¶
func (s *SQLiteStore) ListDatasets(ctx context.Context, filter DatasetFilter) ([]Dataset, error)
func (*SQLiteStore) ListEvalRuns ¶
func (s *SQLiteStore) ListEvalRuns(ctx context.Context, filter EvalRunFilter) ([]EvalRun, error)
func (*SQLiteStore) ListEvalSpecs ¶
func (s *SQLiteStore) ListEvalSpecs(ctx context.Context, filter EvalSpecFilter) ([]EvalSpec, error)
func (*SQLiteStore) ListEvents ¶
func (*SQLiteStore) ListGroupItems ¶
func (s *SQLiteStore) ListGroupItems(ctx context.Context, groupID string) ([]RunGroupItem, error)
func (*SQLiteStore) ListGroups ¶
func (s *SQLiteStore) ListGroups(ctx context.Context, filter RunGroupFilter) ([]RunGroup, error)
func (*SQLiteStore) ListScorecards ¶
func (*SQLiteStore) ListSkillDecisionHistory ¶
func (s *SQLiteStore) ListSkillDecisionHistory(ctx context.Context, filter SkillDecisionHistoryFilter) ([]SkillDecisionHistoryEntry, error)
func (*SQLiteStore) ListSkillEvolutionCases ¶
func (s *SQLiteStore) ListSkillEvolutionCases(ctx context.Context, filter SkillEvolutionCaseFilter) ([]SkillEvolutionCase, error)
func (*SQLiteStore) ListSkillRevisions ¶
func (s *SQLiteStore) ListSkillRevisions(ctx context.Context, filter SkillRevisionFilter) ([]SkillRevision, error)
func (*SQLiteStore) UpdateDataset ¶
func (s *SQLiteStore) UpdateDataset(ctx context.Context, dataset *Dataset) error
func (*SQLiteStore) UpdateEvalRun ¶
func (s *SQLiteStore) UpdateEvalRun(ctx context.Context, evalRun *EvalRun) error
func (*SQLiteStore) UpdateEvalSpec ¶
func (s *SQLiteStore) UpdateEvalSpec(ctx context.Context, spec *EvalSpec) error
func (*SQLiteStore) UpdateGroup ¶
func (s *SQLiteStore) UpdateGroup(ctx context.Context, group *RunGroup) error
func (*SQLiteStore) UpdateGroupItem ¶
func (s *SQLiteStore) UpdateGroupItem(ctx context.Context, item *RunGroupItem) error
func (*SQLiteStore) UpdateRun ¶
func (s *SQLiteStore) UpdateRun(ctx context.Context, run *Run) error
func (*SQLiteStore) UpdateRunIfMaterialStateMatches ¶
func (*SQLiteStore) UpdateSkillEvolutionCase ¶
func (s *SQLiteStore) UpdateSkillEvolutionCase(ctx context.Context, evolutionCase *SkillEvolutionCase) error
func (*SQLiteStore) UpdateSkillRevision ¶
func (s *SQLiteStore) UpdateSkillRevision(ctx context.Context, revision *SkillRevision) error
type ScoreVerdict ¶
type ScoreVerdict string
const ( ScoreVerdictPass ScoreVerdict = "pass" ScoreVerdictFail ScoreVerdict = "fail" ScoreVerdictPartial ScoreVerdict = "partial" ScoreVerdictError ScoreVerdict = "error" )
type Scorecard ¶
type Scorecard struct {
ID string `json:"id"`
GroupID string `json:"group_id"`
GroupItemID string `json:"group_item_id"`
RunID string `json:"run_id,omitempty"`
Mode ScoringMode `json:"mode"`
Verdict ScoreVerdict `json:"verdict"`
Score float64 `json:"score"`
BreakdownJSON string `json:"breakdown_json,omitempty"`
EvidenceJSON string `json:"evidence_json,omitempty"`
JudgeTraceJSON string `json:"judge_trace_json,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
type ScoringMode ¶
type ScoringMode string
const ( ScoringModeRule ScoringMode = "rule" ScoringModeJudge ScoringMode = "judge" ScoringModeHybrid ScoringMode = "hybrid" )
type SelectorCuratedAssets ¶
type SelectorCuratedAssets struct {
Dataset *Dataset `json:"dataset,omitempty"`
DatasetVersion *DatasetVersion `json:"dataset_version,omitempty"`
EvalSpec *EvalSpec `json:"eval_spec,omitempty"`
}
type SelectorGateCheck ¶
type SelectorGateMetrics ¶
type SelectorGateMetrics struct {
CaseCount int `json:"case_count"`
PassedCount int `json:"passed_count"`
PassRate float64 `json:"pass_rate"`
CriticalCaseCount int `json:"critical_case_count"`
CriticalPassedCount int `json:"critical_passed_count"`
CriticalPassRate float64 `json:"critical_pass_rate"`
RouteCaseCount int `json:"route_case_count"`
RouteAgreementCount int `json:"route_agreement_count"`
RouteAgreementRate float64 `json:"route_agreement_rate"`
RouteCompatibleCount int `json:"route_compatible_count"`
RouteCompatibleRate float64 `json:"route_compatible_rate"`
RouteImprovementCount int `json:"route_improvement_count"`
RouteDisagreementCount int `json:"route_disagreement_count"`
CriticalRegressionCount int `json:"critical_regression_count"`
BaseClarifyRate float64 `json:"base_clarify_rate"`
TargetClarifyRate float64 `json:"target_clarify_rate"`
ClarifyRateDelta float64 `json:"clarify_rate_delta"`
LocaleBreakdown map[string]SelectorGateSegmentMetrics `json:"locale_breakdown,omitempty"`
PrimaryRouteBreakdown map[string]SelectorGateSegmentMetrics `json:"primary_route_breakdown,omitempty"`
SelectedCanonicalSkillBreakdown map[string]int `json:"selected_canonical_skill_breakdown,omitempty"`
NativeSurfaceModeBreakdown map[string]int `json:"native_surface_mode_breakdown,omitempty"`
NativeSurfaceReasonBreakdown map[string]int `json:"native_surface_reason_breakdown,omitempty"`
ExecutionProfileBreakdown map[string]int `json:"execution_profile_breakdown,omitempty"`
ToolSurfaceAliasRewriteCount int `json:"tool_surface_alias_rewrite_count,omitempty"`
ToolSurfaceCacheInvalidationCount int `json:"tool_surface_cache_invalidation_count,omitempty"`
ToolSurfaceExecCutoverCount int `json:"tool_surface_exec_cutover_count,omitempty"`
}
type SelectorGateReport ¶
type SelectorGateReport struct {
TargetEvalRunID string `json:"target_eval_run_id"`
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
ComparisonReportID string `json:"comparison_report_id,omitempty"`
Metrics SelectorGateMetrics `json:"metrics"`
Thresholds map[string]interface{} `json:"thresholds,omitempty"`
Checks []SelectorGateCheck `json:"checks,omitempty"`
Passed bool `json:"passed"`
CreatedAt time.Time `json:"created_at"`
}
type SelectorGateRequest ¶
type SelectorGateRequest struct {
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
Thresholds SelectorGateThresholds `json:"thresholds,omitempty"`
}
type SelectorGateSegmentMetrics ¶
type SelectorGateSegmentMetrics struct {
CaseCount int `json:"case_count"`
RouteAgreementCount int `json:"route_agreement_count"`
RouteAgreementRate float64 `json:"route_agreement_rate"`
RouteCompatibleCount int `json:"route_compatible_count"`
RouteCompatibleRate float64 `json:"route_compatible_rate"`
RouteImprovementCount int `json:"route_improvement_count"`
RouteDisagreementCount int `json:"route_disagreement_count"`
CriticalCaseCount int `json:"critical_case_count"`
CriticalRegressionCount int `json:"critical_regression_count"`
BaseClarifyCount int `json:"base_clarify_count"`
BaseClarifyRate float64 `json:"base_clarify_rate"`
TargetClarifyCount int `json:"target_clarify_count"`
TargetClarifyRate float64 `json:"target_clarify_rate"`
ClarifyRateDelta float64 `json:"clarify_rate_delta"`
}
type SelectorGateThresholds ¶
type SelectorGateThresholds struct {
MinPassRate *float64 `json:"min_pass_rate,omitempty"`
MinCriticalPassRate *float64 `json:"min_critical_pass_rate,omitempty"`
MinRouteAgreementRate *float64 `json:"min_route_agreement_rate,omitempty"`
MinRouteCompatibleRate *float64 `json:"min_route_compatible_rate,omitempty"`
MaxClarifyRateDelta *float64 `json:"max_clarify_rate_delta,omitempty"`
MaxCriticalRegressionCount *int `json:"max_critical_regression_count,omitempty"`
}
func DefaultSelectorGateThresholds ¶
func DefaultSelectorGateThresholds() SelectorGateThresholds
type SkillCutoverBudgetCheck ¶
type SkillCutoverBudgetMetrics ¶
type SkillCutoverBudgetMetrics struct {
CaseCount int `json:"case_count"`
ComparableCaseCount int `json:"comparable_case_count"`
MissingSurfaceCaseCount int `json:"missing_surface_case_count"`
BaseMedianToolCount float64 `json:"base_median_tool_count"`
TargetMedianToolCount float64 `json:"target_median_tool_count"`
BaseMedianSchemaBytes float64 `json:"base_median_schema_bytes"`
TargetMedianSchemaBytes float64 `json:"target_median_schema_bytes"`
MedianSchemaByteReductionRate float64 `json:"median_schema_byte_reduction_rate"`
BaseMedianLatencyMs float64 `json:"base_median_latency_ms"`
TargetMedianLatencyMs float64 `json:"target_median_latency_ms"`
MedianLatencyIncreaseRate float64 `json:"median_latency_increase_rate"`
AllowedFinalNativeTools []string `json:"allowed_final_native_tools,omitempty"`
AllowedFinalNativeToolCases int `json:"allowed_final_native_tool_cases"`
AllowedFinalNativeToolCaseRate float64 `json:"allowed_final_native_tool_case_rate"`
NonAllowedNativeToolCaseCount int `json:"non_allowed_native_tool_case_count"`
SelectedCanonicalSkillBreakdown map[string]int `json:"selected_canonical_skill_breakdown,omitempty"`
NativeSurfaceModeBreakdown map[string]int `json:"native_surface_mode_breakdown,omitempty"`
NativeSurfaceReasonBreakdown map[string]int `json:"native_surface_reason_breakdown,omitempty"`
ExecutionProfileBreakdown map[string]int `json:"execution_profile_breakdown,omitempty"`
ToolSurfaceAliasRewriteCount int `json:"tool_surface_alias_rewrite_count,omitempty"`
ToolSurfaceCacheInvalidationCount int `json:"tool_surface_cache_invalidation_count,omitempty"`
ToolSurfaceExecCutoverCount int `json:"tool_surface_exec_cutover_count,omitempty"`
}
type SkillCutoverBudgetReport ¶
type SkillCutoverBudgetReport struct {
TargetEvalRunID string `json:"target_eval_run_id"`
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
Metrics SkillCutoverBudgetMetrics `json:"metrics"`
Thresholds map[string]interface{} `json:"thresholds,omitempty"`
Checks []SkillCutoverBudgetCheck `json:"checks,omitempty"`
Passed bool `json:"passed"`
CreatedAt time.Time `json:"created_at"`
}
type SkillCutoverBudgetRequest ¶
type SkillCutoverBudgetRequest struct {
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
Thresholds SkillCutoverBudgetThresholds `json:"thresholds,omitempty"`
}
type SkillCutoverBudgetThresholds ¶
type SkillCutoverBudgetThresholds struct {
MinMedianSchemaByteReductionRate *float64 `json:"min_median_schema_byte_reduction_rate,omitempty"`
MaxMedianLatencyIncreaseRate *float64 `json:"max_median_latency_increase_rate,omitempty"`
AllowedFinalNativeTools []string `json:"allowed_final_native_tools,omitempty"`
}
type SkillCutoverLaneAssessment ¶
type SkillCutoverLaneAssessment struct {
EvalRunID string `json:"eval_run_id"`
CandidateID string `json:"candidate_id,omitempty"`
Title string `json:"title,omitempty"`
CreatedAt time.Time `json:"created_at"`
Passed bool `json:"passed"`
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
ComparisonReportID string `json:"comparison_report_id,omitempty"`
FailedChecks []string `json:"failed_checks,omitempty"`
}
type SkillCutoverLaneReadiness ¶
type SkillCutoverLaneReadiness struct {
EvalSpecID string `json:"eval_spec_id,omitempty"`
CandidateID string `json:"candidate_id,omitempty"`
RequiredConsecutiveRuns int `json:"required_consecutive_runs"`
CandidateRunCount int `json:"candidate_run_count"`
ConsecutivePassCount int `json:"consecutive_pass_count"`
Ready bool `json:"ready"`
BaseEvalRunID string `json:"base_eval_run_id,omitempty"`
BaselineID string `json:"baseline_id,omitempty"`
Error string `json:"error,omitempty"`
Assessments []SkillCutoverLaneAssessment `json:"assessments,omitempty"`
}
type SkillCutoverReadinessReport ¶
type SkillCutoverReadinessReport struct {
CandidateID string `json:"candidate_id,omitempty"`
RequiredConsecutiveRuns int `json:"required_consecutive_runs"`
EvaluatedGatesReady bool `json:"evaluated_gates_ready"`
Ready bool `json:"ready"`
Selector SkillCutoverLaneReadiness `json:"selector"`
Execution SkillCutoverLaneReadiness `json:"execution"`
Budget SkillCutoverLaneReadiness `json:"budget"`
UnverifiedRequirements []string `json:"unverified_requirements,omitempty"`
BlockingReasons []string `json:"blocking_reasons,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
type SkillCutoverReadinessRequest ¶
type SkillCutoverReadinessRequest struct {
OwnerUserID string `json:"owner_user_id,omitempty"`
CandidateID string `json:"candidate_id,omitempty"`
SelectorEvalSpecID string `json:"selector_eval_spec_id,omitempty"`
ExecutionEvalSpecID string `json:"execution_eval_spec_id,omitempty"`
RequiredConsecutiveRuns int `json:"required_consecutive_runs,omitempty"`
MaxAssessments int `json:"max_assessments,omitempty"`
Selector SelectorGateRequest `json:"selector,omitempty"`
Execution ExecutionEquivalenceRequest `json:"execution,omitempty"`
Budget SkillCutoverBudgetRequest `json:"budget,omitempty"`
}
type SkillDecisionHistoryEntry ¶
type SkillDecisionHistoryEntry struct {
RevisionID string `json:"revision_id"`
SkillID string `json:"skill_id"`
Status SkillRevisionStatus `json:"status"`
SourcePath string `json:"source_path,omitempty"`
CandidateID string `json:"candidate_id,omitempty"`
BaseContentSHA256 string `json:"base_content_sha256,omitempty"`
OriginCaseID string `json:"origin_case_id,omitempty"`
ParentRevisionID string `json:"parent_revision_id,omitempty"`
BackupOfRevisionID string `json:"backup_of_revision_id,omitempty"`
EvalRunID string `json:"eval_run_id,omitempty"`
OptimizationRunID string `json:"optimization_run_id,omitempty"`
FollowupGate string `json:"followup_gate,omitempty"`
OptimizationSurface OptimizationSurface `json:"optimization_surface,omitempty"`
DecisionAction SkillRevisionDecisionAction `json:"decision_action,omitempty"`
ReviewNote string `json:"review_note,omitempty"`
ReviewedBy string `json:"reviewed_by,omitempty"`
DecisionLog map[string]interface{} `json:"decision_log,omitempty"`
DecisionLogJSON string `json:"decision_log_json,omitempty"`
DecisionAt time.Time `json:"decision_at"`
CreatedAt time.Time `json:"created_at"`
ReviewedAt *time.Time `json:"reviewed_at,omitempty"`
PromotedAt *time.Time `json:"promoted_at,omitempty"`
}
type SkillDecisionHistoryFilter ¶
type SkillDecisionHistoryFilter struct {
SkillID string `json:"skill_id,omitempty"`
Actions []SkillRevisionDecisionAction `json:"actions,omitempty"`
Limit int `json:"limit,omitempty"`
}
type SkillEvolutionCase ¶
type SkillEvolutionCase struct {
ID string `json:"id"`
SkillID string `json:"skill_id"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Mode SkillEvolutionMode `json:"mode"`
Reason SkillEvolutionReason `json:"reason"`
SourceKind string `json:"source_kind,omitempty"`
SourceID string `json:"source_id,omitempty"`
CandidateID string `json:"candidate_id,omitempty"`
BaseContentSHA256 string `json:"base_content_sha256,omitempty"`
FailureSignature string `json:"failure_signature,omitempty"`
DedupKey string `json:"dedup_key,omitempty"`
Summary string `json:"summary,omitempty"`
EvidenceJSON string `json:"evidence_json,omitempty"`
RevisionID string `json:"revision_id,omitempty"`
Status SkillEvolutionCaseStatus `json:"status"`
SkippedReason string `json:"skipped_reason,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type SkillEvolutionCaseDetail ¶
type SkillEvolutionCaseDetail struct {
SkillEvolutionCase
LinkedRevision *SkillRevision `json:"linked_revision,omitempty"`
SourceRunID string `json:"source_run_id,omitempty"`
SourceRun *Run `json:"source_run,omitempty"`
SourceEvalRunID string `json:"source_eval_run_id,omitempty"`
SourceEvalRun *EvalRun `json:"source_eval_run,omitempty"`
LinkedEvalRunID string `json:"linked_eval_run_id,omitempty"`
LinkedEvalRun *EvalRun `json:"linked_eval_run,omitempty"`
}
type SkillEvolutionCaseFilter ¶
type SkillEvolutionCaseFilter struct {
SkillID string `json:"skill_id,omitempty"`
OwnerUserID string `json:"owner_user_id,omitempty"`
CandidateID string `json:"candidate_id,omitempty"`
RevisionID string `json:"revision_id,omitempty"`
Statuses []SkillEvolutionCaseStatus `json:"statuses,omitempty"`
Limit int `json:"limit,omitempty"`
}
type SkillEvolutionCaseSpec ¶
type SkillEvolutionCaseSpec struct {
ID string `json:"id,omitempty"`
SkillID string `json:"skill_id"`
OwnerUserID string `json:"owner_user_id,omitempty"`
Mode SkillEvolutionMode `json:"mode"`
Reason SkillEvolutionReason `json:"reason"`
SourceKind string `json:"source_kind,omitempty"`
SourceID string `json:"source_id,omitempty"`
CandidateID string `json:"candidate_id,omitempty"`
BaseContentSHA256 string `json:"base_content_sha256,omitempty"`
FailureSignature string `json:"failure_signature,omitempty"`
Summary string `json:"summary,omitempty"`
EvidenceJSON string `json:"evidence_json,omitempty"`
}
type SkillEvolutionCaseStatus ¶
type SkillEvolutionCaseStatus string
const ( SkillEvolutionCaseStatusOpen SkillEvolutionCaseStatus = "open" SkillEvolutionCaseStatusCandidateCreated SkillEvolutionCaseStatus = "candidate_created" SkillEvolutionCaseStatusAccepted SkillEvolutionCaseStatus = "accepted" SkillEvolutionCaseStatusRejected SkillEvolutionCaseStatus = "rejected" SkillEvolutionCaseStatusPromoted SkillEvolutionCaseStatus = "promoted" SkillEvolutionCaseStatusSkipped SkillEvolutionCaseStatus = "skipped" )
type SkillEvolutionMode ¶
type SkillEvolutionMode string
const ( SkillEvolutionModeFix SkillEvolutionMode = "fix" SkillEvolutionModeCapture SkillEvolutionMode = "capture" )
type SkillEvolutionReason ¶
type SkillEvolutionReason string
const ( SkillEvolutionReasonRuntimeFailure SkillEvolutionReason = "runtime_failure" SkillEvolutionReasonRuntimeCapture SkillEvolutionReason = "runtime_capture" SkillEvolutionReasonSelectorGateFailed SkillEvolutionReason = "selector_gate_failed" SkillEvolutionReasonExecutionGateFailed SkillEvolutionReason = "execution_gate_failed" SkillEvolutionReasonBudgetGateFailed SkillEvolutionReason = "budget_gate_failed" SkillEvolutionReasonManual SkillEvolutionReason = "manual" )
type SkillOptimizeRequest ¶
type SkillPromoteResult ¶
type SkillRevision ¶
type SkillRevision struct {
ID string `json:"id"`
SkillID string `json:"skill_id"`
Status SkillRevisionStatus `json:"status"`
SourcePath string `json:"source_path,omitempty"`
CandidateID string `json:"candidate_id,omitempty"`
BaseContentSHA256 string `json:"base_content_sha256,omitempty"`
OriginCaseID string `json:"origin_case_id,omitempty"`
ParentRevisionID string `json:"parent_revision_id,omitempty"`
BackupOfRevisionID string `json:"backup_of_revision_id,omitempty"`
EvalRunID string `json:"eval_run_id,omitempty"`
OptimizationRunID string `json:"optimization_run_id,omitempty"`
FollowupGate string `json:"followup_gate,omitempty"`
OptimizationSurface OptimizationSurface `json:"optimization_surface,omitempty"`
DecisionAction SkillRevisionDecisionAction `json:"decision_action,omitempty"`
ReviewNote string `json:"review_note,omitempty"`
ReviewedBy string `json:"reviewed_by,omitempty"`
DecisionLogJSON string `json:"decision_log_json,omitempty"`
Content string `json:"content,omitempty"`
ContentSHA256 string `json:"content_sha256,omitempty"`
CreatedAt time.Time `json:"created_at"`
ReviewedAt *time.Time `json:"reviewed_at,omitempty"`
PromotedAt *time.Time `json:"promoted_at,omitempty"`
}
type SkillRevisionDecisionAction ¶
type SkillRevisionDecisionAction string
const ( SkillRevisionDecisionActionPromote SkillRevisionDecisionAction = "promote" SkillRevisionDecisionActionRollback SkillRevisionDecisionAction = "rollback" )
type SkillRevisionFilter ¶
type SkillRevisionFilter struct {
SkillID string `json:"skill_id,omitempty"`
Statuses []SkillRevisionStatus `json:"statuses,omitempty"`
Limit int `json:"limit,omitempty"`
}
type SkillRevisionPromotionRecorder ¶
type SkillRevisionPromotionRecorder interface {
RecordSkillRevisionPromotion(ctx context.Context, promotedRevision *SkillRevision, backupRevision *SkillRevision, writtenSourcePath string) error
}
type SkillRevisionStatus ¶
type SkillRevisionStatus string
const ( SkillRevisionStatusCandidate SkillRevisionStatus = "candidate" SkillRevisionStatusAccepted SkillRevisionStatus = "accepted" SkillRevisionStatusRejected SkillRevisionStatus = "rejected" SkillRevisionStatusPromoted SkillRevisionStatus = "promoted" SkillRevisionStatusBackup SkillRevisionStatus = "backup" )
type SnapshotDriver ¶
type Store ¶
type Store interface {
CreateRun(ctx context.Context, run *Run) error
UpdateRun(ctx context.Context, run *Run) error
GetRun(ctx context.Context, id string) (*Run, error)
ListRuns(ctx context.Context, filter RunFilter) ([]Run, error)
AppendEvent(ctx context.Context, event RunEvent) error
ListEvents(ctx context.Context, runID string, limit int) ([]RunEvent, error)
AttachArtifact(ctx context.Context, ref ArtifactRef) error
ListArtifacts(ctx context.Context, runID string) ([]ArtifactRef, error)
}
type UserTaskActions ¶
type UserTaskActions struct {
Items []ActionDescriptor `json:"items,omitempty"`
}
type UserTaskArtifact ¶
type UserTaskBlocker ¶
type UserTaskProjection ¶
type UserTaskProjection struct {
ID string `json:"id"`
Kind RunKind `json:"kind"`
ConversationID string `json:"conversation_id,omitempty"`
Scope string `json:"scope"`
Title string `json:"title"`
Subtitle string `json:"subtitle,omitempty"`
Status string `json:"status"`
Stage string `json:"stage"`
Progress int `json:"progress"`
Blocker *UserTaskBlocker `json:"blocker,omitempty"`
ResultPreview string `json:"result_preview,omitempty"`
ErrorPreview string `json:"error_preview,omitempty"`
Artifacts []UserTaskArtifact `json:"artifacts,omitempty"`
ResearchSources []UserTaskResearchSource `json:"research_sources,omitempty"`
SubagentSummary *UserTaskSubagentSummary `json:"subagent_summary,omitempty"`
Actions UserTaskActions `json:"actions"`
RunStatus string `json:"run_status,omitempty"`
VerificationStatus string `json:"verification_status,omitempty"`
Score *float64 `json:"score,omitempty"`
EvidenceCount *int `json:"evidence_count,omitempty"`
DetailHref string `json:"detail_href,omitempty"`
UpdatedAt time.Time `json:"updated_at"`
FinishedAt *time.Time `json:"finished_at,omitempty"`
}
type UserTaskProjectionHandler ¶
type UserTaskProjectionHandler struct {
// contains filtered or unexported fields
}
func NewUserTaskProjectionHandler ¶
func NewUserTaskProjectionHandler(manager *Controller, detailProvider RunDetailProvider) *UserTaskProjectionHandler
func (*UserTaskProjectionHandler) CancelTask ¶
func (h *UserTaskProjectionHandler) CancelTask(c echo.Context) error
func (*UserTaskProjectionHandler) GetTask ¶
func (h *UserTaskProjectionHandler) GetTask(c echo.Context) error
func (*UserTaskProjectionHandler) PerformTaskAction ¶
func (h *UserTaskProjectionHandler) PerformTaskAction(c echo.Context) error
func (*UserTaskProjectionHandler) RegisterRoutes ¶
func (h *UserTaskProjectionHandler) RegisterRoutes(g *echo.Group)
func (*UserTaskProjectionHandler) ResumeTask ¶
func (h *UserTaskProjectionHandler) ResumeTask(c echo.Context) error
type UserTaskProjectionService ¶
type UserTaskProjectionService struct {
// contains filtered or unexported fields
}
func NewUserTaskProjectionService ¶
func NewUserTaskProjectionService(manager *Controller, detailProvider RunDetailProvider) *UserTaskProjectionService
func (*UserTaskProjectionService) Get ¶
func (s *UserTaskProjectionService) Get(ctx context.Context, runID string, scope string) (*UserTaskProjection, error)
func (*UserTaskProjectionService) List ¶
func (s *UserTaskProjectionService) List(ctx context.Context, filter UserTaskProjectionFilter) ([]UserTaskProjection, error)
type UserTaskResearchSource ¶
type UserTaskResearchSource struct {
Title string `json:"title"`
URL string `json:"url,omitempty"`
Domain string `json:"domain,omitempty"`
SourceType string `json:"source_type,omitempty"`
PublishedAt string `json:"published_at,omitempty"`
FetchedAt string `json:"fetched_at,omitempty"`
RelevanceScore float64 `json:"relevance_score,omitempty"`
CredibilityScore float64 `json:"credibility_score,omitempty"`
}
type UserTaskSubagentSummary ¶
type UserTaskSubagentSummary struct {
Total int `json:"total"`
Running int `json:"running,omitempty"`
WaitingUser int `json:"waiting_user,omitempty"`
Completed int `json:"completed,omitempty"`
Failed int `json:"failed,omitempty"`
Cancelled int `json:"cancelled,omitempty"`
LatestTitle string `json:"latest_title,omitempty"`
LatestStatus string `json:"latest_status,omitempty"`
LatestUpdatedAt time.Time `json:"latest_updated_at,omitempty"`
}
type WritePathGuard ¶
type WritePathGuard struct {
// contains filtered or unexported fields
}
func NewWritePathGuard ¶
func NewWritePathGuard(manager *Controller) *WritePathGuard
func (*WritePathGuard) CheckWritePath ¶
func (g *WritePathGuard) CheckWritePath(ctx context.Context, absPath string) error
Source Files
¶
- agent_compat_handler.go
- batch_trajectory_runner.go
- budget_gate.go
- concurrency_config.go
- contract.go
- cua_benchmark_profile.go
- cua_benchmark_report.go
- cutover_readiness.go
- dataset_bundle_defaults.go
- dataset_bundle_loader.go
- desktop_chat_dataset.go
- discovery_observability.go
- eval_compare.go
- eval_manager.go
- eval_store.go
- eval_types.go
- evolution_overview.go
- exec_guard.go
- execution_dataset.go
- execution_gate.go
- execution_types.go
- group_dispatcher.go
- group_manager.go
- group_promotion.go
- group_verifier.go
- handler.go
- integrations.go
- manager.go
- migration.go
- optimization_orchestrator.go
- policy.go
- proposal_intake.go
- run_actions.go
- run_context.go
- runtime_observer.go
- runtime_pipeline.go
- runtime_reflection.go
- runtime_reflection_records.go
- runtime_skill_evolution.go
- scoring.go
- selector_dataset.go
- selector_gate.go
- skill_candidate_middleware.go
- skill_evolution.go
- skill_revision.go
- sqlite_busy_driver_cgo.go
- sqlite_busy_test_cgo.go
- store.go
- subagent_executor.go
- task_projection.go
- task_projection_handler.go
- trace_provider.go
- tracing.go
- types.go
- write_guard.go