Documentation
¶
Overview ¶
Package cli implements the command-line interface for RagTune. It provides commands for ingesting documents, explaining retrieval results, running simulations, auditing RAG quality, and comparing configurations.
Package cli implements the command-line interface for RagTune. It provides commands for document ingestion, retrieval explanation, simulation, auditing, and configuration comparison.
Index ¶
- Variables
- func Execute() error
- type AuditError
- type CICheckError
- type CollectionResult
- type CompareResult
- type ConfigResult
- type Document
- type GoldenQueries
- type GoldenQuery
- type JSONBaseline
- type JSONBootstrap
- type JSONComparison
- type JSONMetrics
- type JSONOutput
- type JSONQueryFailure
- type JSONThresholdCheck
- type JSONThresholds
- type MetricDelta
- type QueryFailure
- type RegressionError
- type RunResult
Constants ¶
This section is empty.
Variables ¶
var ( // ErrAuditFailed indicates one or more audit thresholds were not met. ErrAuditFailed = errors.New("audit failed") // ErrCICheckFailed indicates CI threshold checks did not pass. ErrCICheckFailed = errors.New("CI check failed") // ErrValidation indicates invalid input parameters. ErrValidation = errors.New("validation error") )
Sentinel errors for CLI operations. These allow callers to check specific error conditions with errors.Is().
var Version = "dev"
Version is set at build time via -ldflags
Functions ¶
Types ¶
type AuditError ¶ added in v0.1.1
type AuditError struct {
FailCount int
}
AuditError provides details about audit failures.
func (*AuditError) Error ¶ added in v0.1.1
func (e *AuditError) Error() string
func (*AuditError) Unwrap ¶ added in v0.1.1
func (e *AuditError) Unwrap() error
type CICheckError ¶ added in v0.1.1
type CICheckError struct {
FailedChecks []string
}
CICheckError provides details about CI check failures.
func (*CICheckError) Error ¶ added in v0.1.1
func (e *CICheckError) Error() string
func (*CICheckError) Unwrap ¶ added in v0.1.1
func (e *CICheckError) Unwrap() error
type CollectionResult ¶
type CollectionResult struct {
Collection string `json:"collection"`
Metrics metrics.Result `json:"metrics"`
QueryResults []metrics.QueryResult `json:"query_results,omitempty"`
}
CollectionResult holds results for a single collection.
type CompareResult ¶
type CompareResult struct {
Timestamp string `json:"timestamp"`
Collections []CollectionResult `json:"collections"`
}
CompareResult holds comparison results across collections.
type ConfigResult ¶
type ConfigResult struct {
Config config.SimConfig `json:"config"`
Metrics metrics.Result `json:"metrics"`
Bootstrap *metrics.BootstrapResult `json:"bootstrap,omitempty"`
QueryResults []metrics.QueryResult `json:"query_results"`
}
ConfigResult represents results for a single configuration.
type GoldenQueries ¶
type GoldenQueries struct {
Queries []GoldenQuery `json:"queries"`
}
GoldenQueries represents the golden queries file format.
type GoldenQuery ¶
type GoldenQuery struct {
ID string `json:"id"`
Text string `json:"text"`
RelevantDocs []string `json:"relevant_docs"`
}
GoldenQuery represents a single golden query.
type JSONBaseline ¶ added in v0.2.0
type JSONBaseline struct {
Timestamp string `json:"timestamp"`
Comparisons []JSONComparison `json:"comparisons"`
Regressed bool `json:"regressed"`
Regressions []string `json:"regressions,omitempty"`
}
JSONBaseline contains baseline comparison data.
type JSONBootstrap ¶ added in v0.3.0
type JSONBootstrap struct {
N int `json:"n"`
RecallMean float64 `json:"recall_mean"`
RecallStd float64 `json:"recall_std"`
RecallCI95Lo float64 `json:"recall_ci95_lo"`
RecallCI95Hi float64 `json:"recall_ci95_hi"`
MRRMean float64 `json:"mrr_mean"`
MRRStd float64 `json:"mrr_std"`
MRRCI95Lo float64 `json:"mrr_ci95_lo"`
MRRCI95Hi float64 `json:"mrr_ci95_hi"`
NDCGMean float64 `json:"ndcg_mean"`
NDCGStd float64 `json:"ndcg_std"`
CoverageMean float64 `json:"coverage_mean"`
CoverageStd float64 `json:"coverage_std"`
}
JSONBootstrap contains bootstrap confidence interval data.
type JSONComparison ¶ added in v0.2.0
type JSONComparison struct {
Metric string `json:"metric"`
Baseline float64 `json:"baseline"`
Current float64 `json:"current"`
Delta float64 `json:"delta"`
DeltaPct float64 `json:"delta_pct"`
Regressed bool `json:"regressed"`
}
JSONComparison represents a single metric comparison.
type JSONMetrics ¶ added in v0.2.0
type JSONMetrics struct {
RecallAtK float64 `json:"recall_at_k"`
MRR float64 `json:"mrr"`
NDCGAtK float64 `json:"ndcg_at_k"`
Coverage float64 `json:"coverage"`
Redundancy float64 `json:"redundancy"`
LatencyP50 float64 `json:"latency_p50_ms"`
LatencyP95 float64 `json:"latency_p95_ms"`
LatencyP99 float64 `json:"latency_p99_ms"`
LatencyAvg float64 `json:"latency_avg_ms"`
QueryCount int `json:"query_count"`
TopK int `json:"top_k"`
}
JSONMetrics contains the core metrics in JSON format.
type JSONOutput ¶ added in v0.2.0
type JSONOutput struct {
Status string `json:"status"` // "pass" or "fail"
Timestamp string `json:"timestamp"`
Collection string `json:"collection"`
Store string `json:"store"`
Metrics JSONMetrics `json:"metrics"`
Bootstrap *JSONBootstrap `json:"bootstrap,omitempty"`
Baseline *JSONBaseline `json:"baseline,omitempty"`
Thresholds *JSONThresholds `json:"thresholds,omitempty"`
Failures []JSONQueryFailure `json:"failures,omitempty"`
RunFile string `json:"run_file"`
}
JSONOutput represents the machine-readable output format for CI pipelines.
type JSONQueryFailure ¶ added in v0.2.0
type JSONQueryFailure struct {
QueryID string `json:"query_id"`
Query string `json:"query"`
ExpectedDocs []string `json:"expected_docs"`
RetrievedDocs []string `json:"retrieved_docs"`
}
JSONQueryFailure represents a failed query in JSON output.
type JSONThresholdCheck ¶ added in v0.2.0
type JSONThresholdCheck struct {
Metric string `json:"metric"`
Value float64 `json:"value"`
Threshold float64 `json:"threshold"`
Passed bool `json:"passed"`
}
JSONThresholdCheck represents a single threshold check.
type JSONThresholds ¶ added in v0.2.0
type JSONThresholds struct {
Checks []JSONThresholdCheck `json:"checks"`
Passed bool `json:"passed"`
}
JSONThresholds contains CI threshold results.
type MetricDelta ¶ added in v0.2.0
type MetricDelta struct {
Name string
Baseline float64
Current float64
Delta float64
DeltaPct float64
Regressed bool
HigherGood bool // true for recall/mrr/coverage, false for latency
}
MetricDelta represents the change in a metric from baseline to current.
type QueryFailure ¶
type QueryFailure struct {
QueryID string
Query string
RelevantDocs []string
RetrievedDocs []string
TopScores []float32
Recall float64
}
QueryFailure represents a query that failed to retrieve its relevant documents.
type RegressionError ¶ added in v0.2.0
type RegressionError struct {
Regressions []string
}
RegressionError is returned when metrics regress and --fail-on-regression is set.
func (*RegressionError) Error ¶ added in v0.2.0
func (e *RegressionError) Error() string