cli

package
v0.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 25, 2026 License: MIT Imports: 24 Imported by: 0

Documentation

Overview

Package cli implements the command-line interface for RagTune. It provides commands for ingesting documents, explaining retrieval results, running simulations, auditing RAG quality, and comparing configurations.

Package cli implements the command-line interface for RagTune. It provides commands for document ingestion, retrieval explanation, simulation, auditing, and configuration comparison.

Index

Constants

This section is empty.

Variables

View Source
var (
	// ErrAuditFailed indicates one or more audit thresholds were not met.
	ErrAuditFailed = errors.New("audit failed")

	// ErrCICheckFailed indicates CI threshold checks did not pass.
	ErrCICheckFailed = errors.New("CI check failed")

	// ErrValidation indicates invalid input parameters.
	ErrValidation = errors.New("validation error")
)

Sentinel errors for CLI operations. These allow callers to check specific error conditions with errors.Is().

View Source
var Version = "dev"

Version is set at build time via -ldflags

Functions

func Execute

func Execute() error

Execute runs the root command

Types

type AuditError added in v0.1.1

type AuditError struct {
	FailCount int
}

AuditError provides details about audit failures.

func (*AuditError) Error added in v0.1.1

func (e *AuditError) Error() string

func (*AuditError) Unwrap added in v0.1.1

func (e *AuditError) Unwrap() error

type CICheckError added in v0.1.1

type CICheckError struct {
	FailedChecks []string
}

CICheckError provides details about CI check failures.

func (*CICheckError) Error added in v0.1.1

func (e *CICheckError) Error() string

func (*CICheckError) Unwrap added in v0.1.1

func (e *CICheckError) Unwrap() error

type CollectionResult

type CollectionResult struct {
	Collection   string                `json:"collection"`
	Metrics      metrics.Result        `json:"metrics"`
	QueryResults []metrics.QueryResult `json:"query_results,omitempty"`
}

CollectionResult holds results for a single collection.

type CompareResult

type CompareResult struct {
	Timestamp   string             `json:"timestamp"`
	Collections []CollectionResult `json:"collections"`
}

CompareResult holds comparison results across collections.

type ConfigResult

type ConfigResult struct {
	Config       config.SimConfig         `json:"config"`
	Metrics      metrics.Result           `json:"metrics"`
	Bootstrap    *metrics.BootstrapResult `json:"bootstrap,omitempty"`
	QueryResults []metrics.QueryResult    `json:"query_results"`
}

ConfigResult represents results for a single configuration.

type Document

type Document struct {
	Path    string
	Content string
}

Document represents a loaded document

type GoldenQueries

type GoldenQueries struct {
	Queries []GoldenQuery `json:"queries"`
}

GoldenQueries represents the golden queries file format.

type GoldenQuery

type GoldenQuery struct {
	ID           string   `json:"id"`
	Text         string   `json:"text"`
	RelevantDocs []string `json:"relevant_docs"`
}

GoldenQuery represents a single golden query.

type JSONBaseline added in v0.2.0

type JSONBaseline struct {
	Timestamp   string           `json:"timestamp"`
	Comparisons []JSONComparison `json:"comparisons"`
	Regressed   bool             `json:"regressed"`
	Regressions []string         `json:"regressions,omitempty"`
}

JSONBaseline contains baseline comparison data.

type JSONBootstrap added in v0.3.0

type JSONBootstrap struct {
	N            int     `json:"n"`
	RecallMean   float64 `json:"recall_mean"`
	RecallStd    float64 `json:"recall_std"`
	RecallCI95Lo float64 `json:"recall_ci95_lo"`
	RecallCI95Hi float64 `json:"recall_ci95_hi"`
	MRRMean      float64 `json:"mrr_mean"`
	MRRStd       float64 `json:"mrr_std"`
	MRRCI95Lo    float64 `json:"mrr_ci95_lo"`
	MRRCI95Hi    float64 `json:"mrr_ci95_hi"`
	NDCGMean     float64 `json:"ndcg_mean"`
	NDCGStd      float64 `json:"ndcg_std"`
	CoverageMean float64 `json:"coverage_mean"`
	CoverageStd  float64 `json:"coverage_std"`
}

JSONBootstrap contains bootstrap confidence interval data.

type JSONComparison added in v0.2.0

type JSONComparison struct {
	Metric    string  `json:"metric"`
	Baseline  float64 `json:"baseline"`
	Current   float64 `json:"current"`
	Delta     float64 `json:"delta"`
	DeltaPct  float64 `json:"delta_pct"`
	Regressed bool    `json:"regressed"`
}

JSONComparison represents a single metric comparison.

type JSONMetrics added in v0.2.0

type JSONMetrics struct {
	RecallAtK  float64 `json:"recall_at_k"`
	MRR        float64 `json:"mrr"`
	NDCGAtK    float64 `json:"ndcg_at_k"`
	Coverage   float64 `json:"coverage"`
	Redundancy float64 `json:"redundancy"`
	LatencyP50 float64 `json:"latency_p50_ms"`
	LatencyP95 float64 `json:"latency_p95_ms"`
	LatencyP99 float64 `json:"latency_p99_ms"`
	LatencyAvg float64 `json:"latency_avg_ms"`
	QueryCount int     `json:"query_count"`
	TopK       int     `json:"top_k"`
}

JSONMetrics contains the core metrics in JSON format.

type JSONOutput added in v0.2.0

type JSONOutput struct {
	Status     string             `json:"status"` // "pass" or "fail"
	Timestamp  string             `json:"timestamp"`
	Collection string             `json:"collection"`
	Store      string             `json:"store"`
	Metrics    JSONMetrics        `json:"metrics"`
	Bootstrap  *JSONBootstrap     `json:"bootstrap,omitempty"`
	Baseline   *JSONBaseline      `json:"baseline,omitempty"`
	Thresholds *JSONThresholds    `json:"thresholds,omitempty"`
	Failures   []JSONQueryFailure `json:"failures,omitempty"`
	RunFile    string             `json:"run_file"`
}

JSONOutput represents the machine-readable output format for CI pipelines.

type JSONQueryFailure added in v0.2.0

type JSONQueryFailure struct {
	QueryID       string   `json:"query_id"`
	Query         string   `json:"query"`
	ExpectedDocs  []string `json:"expected_docs"`
	RetrievedDocs []string `json:"retrieved_docs"`
}

JSONQueryFailure represents a failed query in JSON output.

type JSONThresholdCheck added in v0.2.0

type JSONThresholdCheck struct {
	Metric    string  `json:"metric"`
	Value     float64 `json:"value"`
	Threshold float64 `json:"threshold"`
	Passed    bool    `json:"passed"`
}

JSONThresholdCheck represents a single threshold check.

type JSONThresholds added in v0.2.0

type JSONThresholds struct {
	Checks []JSONThresholdCheck `json:"checks"`
	Passed bool                 `json:"passed"`
}

JSONThresholds contains CI threshold results.

type MetricDelta added in v0.2.0

type MetricDelta struct {
	Name       string
	Baseline   float64
	Current    float64
	Delta      float64
	DeltaPct   float64
	Regressed  bool
	HigherGood bool // true for recall/mrr/coverage, false for latency
}

MetricDelta represents the change in a metric from baseline to current.

type QueryFailure

type QueryFailure struct {
	QueryID       string
	Query         string
	RelevantDocs  []string
	RetrievedDocs []string
	TopScores     []float32
	Recall        float64
}

QueryFailure represents a query that failed to retrieve its relevant documents.

type RegressionError added in v0.2.0

type RegressionError struct {
	Regressions []string
}

RegressionError is returned when metrics regress and --fail-on-regression is set.

func (*RegressionError) Error added in v0.2.0

func (e *RegressionError) Error() string

type RunResult

type RunResult struct {
	Timestamp  string         `json:"timestamp"`
	Collection string         `json:"collection"`
	Store      string         `json:"store"`
	Configs    []ConfigResult `json:"configs"`
}

RunResult represents the complete simulation run output.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL