query

package

v0.2.1 Latest Latest Go to latest Published: Jul 1, 2026 License: MIT Imports: 36 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/peiman/vaultmind

Links

Open Source Insights

Documentation ¶

Overview ¶

Package query provides read-only vault diagnostics and search operations.

Index ¶

Constants
func BuildAutoRetriever(db *index.DB) (retrieval.Retriever, func(), error)
func BuildRetriever(mode string, db *index.DB) (retrieval.Retriever, func(), error)
func CollectTypeBreakdown(db *index.DB, cfg *vault.Config) (map[string]StatusTypeInfo, error)
func CosineSimilarity(a, b []float32) float64
func FormatAsk(result *AskResult, w io.Writer) error
func FormatAskExplain(result *AskResult, w io.Writer) error
func FormatAskPointersOnly(result *AskResult, w io.Writer) error
func FormatAskPreview(result *AskResult, w io.Writer) error
func FormatAskRead(result *AskResult, note *index.FullNote, w io.Writer) error
func FormatAskReadWithOptions(result *AskResult, note *index.FullNote, w io.Writer, explain bool) error
func FormatGitStatus(result *GitStatusResult, w io.Writer) error
func FuzzyTitleMatches(query string, titles []index.NoteTitle, n int) []index.NoteTitle
func MeshSurfacedCounts(m *DoctorMeshIdentity) (errs, warns int)
func NoteSimilarities(ctx context.Context, queryText string, embedder embedding.Embedder, ...) (map[string]float64, error)
func ResultSurfacedIssueCounts(result *DoctorResult) (errs, warns int)
func RunLinks(db *index.DB, cfg LinksConfig, w io.Writer) error
func RunNoteGet(db *index.DB, cfg NoteGetConfig, w io.Writer) error
func RunResolve(db *index.DB, input, vaultPath string, jsonOut bool, w io.Writer) error
func RunSelf(db *index.DB, cfg SelfConfig, w io.Writer) error
func SurfacedIssueCounts(issues DoctorIssues) (errors, warnings int)
func Truncate(s string, maxLen int) string
func WriteKeywordOnlyHint(w io.Writer, retrievalMode string, hitCount int) bool
func WriteTitleSuggestions(w io.Writer, matches []index.NoteTitle) bool
type ActivationReranker
- func (r *ActivationReranker) Search(ctx context.Context, query string, limit, offset int, ...) ([]retrieval.ScoredResult, int, error)
type ActivationRetriever
- func (r *ActivationRetriever) Search(_ context.Context, _ string, limit, offset int, filters index.SearchFilters) ([]retrieval.ScoredResult, int, error)
type ActivationScoreFn
type AskConfig
type AskResult
- func Ask(ctx context.Context, retriever retrieval.Retriever, resolver *graph.Resolver, ...) (*AskResult, error)
- func AskHits(ctx context.Context, retriever retrieval.Retriever, query string, ...) (*AskResult, error)
type AutoRetrieverResult
- func BuildAutoRetrieverFull(db *index.DB) AutoRetrieverResult
- func BuildAutoRetrieverWithActivation(db *index.DB, expDB *experiment.DB) AutoRetrieverResult
- func BuildAutoRetrieverWithRerank(db *index.DB, expDB *experiment.DB, alpha, beta float64) AutoRetrieverResult
type BothResult
- func CollectBoth(db *index.DB, cfg LinksConfig) (BothResult, string, error)
type ColBERTEmbedFunc
type ColBERTRetriever
- func (r *ColBERTRetriever) Search(ctx context.Context, query string, limit, offset int, ...) ([]retrieval.ScoredResult, int, error)
type ContentDrift
- func DetectContentDrift(db *index.DB, vaultPath string) ([]ContentDrift, error)
type DoctorAllResult
type DoctorEmbeddings
type DoctorIssues
type DoctorMeshIdentity
- func BuildMeshIdentity(ctx context.Context, in MeshDoctorInput) (*DoctorMeshIdentity, error)
- func (m *DoctorMeshIdentity) HasSignal() bool
type DoctorModelBreakdown
type DoctorResult
- func Doctor(db *index.DB, vaultPath string, reg *schema.Registry) (*DoctorResult, error)
type DoctorRollup
- func BuildDoctorRollup(vaults []*DoctorResult, failed []FailedVault) DoctorRollup
- func (r DoctorRollup) RawValidationGap() int
type EmbeddingRetriever
- func (r *EmbeddingRetriever) Search(ctx context.Context, query string, limit, offset int, ...) ([]retrieval.ScoredResult, int, error)
type FTSRetriever
- func (r *FTSRetriever) Search(_ context.Context, query string, limit, offset int, ...) ([]retrieval.ScoredResult, int, error)
type FailedVault
type GitStatusResult
- func GitStatus(detector git.RepoStateDetector, vaultPath string) (*GitStatusResult, error)
type HybridRetriever
- func (h *HybridRetriever) Search(ctx context.Context, query string, limit, offset int, ...) ([]retrieval.ScoredResult, int, error)
type InResult
- func CollectIn(db *index.DB, noteID, edgeType string) (InResult, error)
type IncompatibleLink
type LinksConfig
type MeasuredCalibration
- func MeasureNoiseFloor(ctx context.Context, embedder embedding.Embedder, db *index.DB) (*MeasuredCalibration, error)
type MeshDaemonClient
type MeshDoctorInput
type MeshSigner
type MgetResult
- func Mget(db *index.DB, ids []string, frontmatterOnly bool) (*MgetResult, error)
type NeighborEdge
type NeighborNode
type NeighborsResult
- func Neighbors(resolver *graph.Resolver, input string, depth int, minConfidence string, ...) (*NeighborsResult, error)
type NoteGetConfig
type OutResult
- func CollectOut(db *index.DB, noteID, edgeType string) (OutResult, error)
type SearchConfig
type SearchResult
- func RunSearch(retriever retrieval.Retriever, cfg SearchConfig, w io.Writer) (*SearchResult, error)
type SelfConfig
- func SelfDefaults(cfg SelfConfig) SelfConfig
type SparseEmbedFunc
type SparseRetriever
- func (r *SparseRetriever) Search(ctx context.Context, query string, limit, offset int, ...) ([]retrieval.ScoredResult, int, error)
type StatusIssuesSummary
- func SummarizeValidationIssues(db *index.DB, reg *schema.Registry) (StatusIssuesSummary, error)
type StatusResult
- func VaultStatus(db *index.DB, vaultPath string, cfg *vault.Config, reg *schema.Registry) (*StatusResult, error)
type StatusTypeInfo
type UnresolvedLink
type ValidateIssue
type ValidateResult
- func Validate(db *index.DB, reg *schema.Registry) (*ValidateResult, error)
- func ValidateLive(vaultPath string, reg *schema.Registry) (*ValidateResult, error)

Constants ¶

View Source

const (
	ConfidenceStrong   = noisefloor.ConfidenceStrong
	ConfidenceModerate = noisefloor.ConfidenceModerate
	ConfidenceWeak     = noisefloor.ConfidenceWeak
	// ConfidenceNoMatch — in noise-floor mode, the top hit is at/below the
	// embedder's noise floor (nothing relevant). In the RRF-gap fallback it
	// means the top results are essentially tied (no clear winner). Both read
	// as "don't commit to top-1".
	ConfidenceNoMatch = noisefloor.ConfidenceNoMatch
)

Confidence tier strings. The noisefloor package is the single source of truth (Principle 7); these are aliases so the rest of the query package can keep referring to query.ConfidenceX without a second literal set that could silently drift from the labels Ask now writes via noisefloor.Relevance.

View Source

const (
	// StatusMeshAuthenticated: pinned root verified the registry AND the running
	// signer proved possession of the resolved binding key. The ONLY green state.
	StatusMeshAuthenticated = "authenticated"
	// StatusMeshSelfConsistentUnpinned: no pin; the daemon-advertised root
	// verified its own registry for self-consistency ONLY — NOT authenticated.
	StatusMeshSelfConsistentUnpinned = "self-consistent-unpinned"
	// StatusMeshNotEnrolled: pinned + registry verified, but the member slug has
	// no live binding (enroll-add pending).
	StatusMeshNotEnrolled = "not-enrolled"
	// StatusMeshKeyMismatch: pinned + binding resolved, but the running signer
	// does NOT hold the binding's key (wrong signer/key).
	StatusMeshKeyMismatch = "key-mismatch"
	// StatusMeshUnverifiable: pinned, but the registry failed to verify
	// (bad-sig/stale/rollback) or could not be fetched.
	StatusMeshUnverifiable = "unverifiable"
	// StatusMeshNoSignal: no mesh signal present (caller should not surface the
	// section at all — see HasSignal).
	StatusMeshNoSignal = "no-signal"
)

Tier-2 status values (SSOT). Each is a distinct, machine-readable verdict.

View Source

const (
	// DaemonModePlaintext: the daemon serves no well-known root (404).
	DaemonModePlaintext = "plaintext"
	// DaemonModeAdvisoryConfigured: a well-known root is served (registry
	// configured). Advisory only — verdict-gating is a return-true no-op today.
	DaemonModeAdvisoryConfigured = "advisory-configured"
	// DaemonModeUnknown: the daemon was unreachable, so its mode is unknown.
	DaemonModeUnknown = ""
)

Tier-3 daemon-mode values (SSOT). FACTUAL served-state labels only — they do NOT imply message-signature protection (enforcement is a no-op today).

View Source

const (
	WarnMeshKeyMode        = "identity key file is not 0600 (custody mode is wrong)"
	WarnMeshKeySize        = "identity key file is not the expected ed25519 private-key size"
	WarnMeshUnpinned       = "registry self-consistent (daemon-advertised root), NOT authenticated — enroll persists a pin, or pass --mesh-root-pubkey"
	WarnMeshNotEnrolled    = "your slug is not in the network registry yet (enroll-add pending?)"
	WarnMeshKeyMismatch    = "your binding exists but the running signer does not hold its key (wrong signer/key?)"
	WarnMeshUnverifiable   = "the network registry did not verify against your pinned root (bad signature, stale, or rolled back)"
	WarnMeshNoRegistry     = "no registry available to verify (daemon unreachable and no --mesh-registry given)"
	WarnMeshEnforcementOff = "message-signature enforcement is NOT YET active (advisory mode is a no-op today)"
	WarnMeshHeartbeatStale = "wake-watcher heartbeat is stale — the watcher may be present-but-dead"
)

Warning strings (SSOT) — each drives both the human Warnings slice and an envelope.AddWarning in the cmd layer.

View Source

const (
	RuleBrokenReference = "broken_reference"
	RuleMissingRequired = "missing_required_field"
	RuleUnknownType     = "unknown_type"
	RuleInvalidStatus   = "invalid_status"
)

Validate rule identifiers — the SSOT for rule strings used across the query layer. validate.go is the canonical definition site; doctor.go and any other caller must reference these constants instead of inlining the strings.

View Source

const DefaultRRFK = 60

DefaultRRFK is the Reciprocal Rank Fusion smoothing constant from the original RRF paper. Applied when HybridRetriever.K is zero-value. Tuning this value shifts every hybrid retrieval's ranking; it lives here so there's one home to reason about.

Variables ¶

This section is empty.

Functions ¶

func BuildAutoRetriever ¶

func BuildAutoRetriever(db *index.DB) (retrieval.Retriever, func(), error)

BuildAutoRetriever returns a hybrid retriever if embeddings exist, otherwise keyword. Embedder initialization failure falls back to keyword silently.

func BuildRetriever ¶

func BuildRetriever(mode string, db *index.DB) (retrieval.Retriever, func(), error)

BuildRetriever creates the appropriate retriever for the given search mode. Returns a cleanup function that must be deferred if non-nil.

func CollectTypeBreakdown ¶ added in v0.1.11

func CollectTypeBreakdown(db *index.DB, cfg *vault.Config) (map[string]StatusTypeInfo, error)

CollectTypeBreakdown returns the per-type note counts together with each type's required fields and valid statuses, drawn from the vault config's type registry. It is the single source of truth for the per-type breakdown surfaced by both `vault status` (cold-start) and `doctor` (health hub). A nil cfg yields an empty (non-nil) map so callers can range safely.

func CosineSimilarity ¶

func CosineSimilarity(a, b []float32) float64

CosineSimilarity computes the cosine similarity between two float32 vectors. Returns 0 if vectors have different lengths, zero length, or zero magnitude.

func FormatAsk ¶

func FormatAsk(result *AskResult, w io.Writer) error

FormatAsk writes a human-readable text representation of an AskResult.

func FormatAskExplain ¶

func FormatAskExplain(result *AskResult, w io.Writer) error

FormatAskExplain is like FormatAsk but prints per-hit lane breakdowns (which sub-retrievers scored the note, each lane's raw 1/(K+rank), and how many lanes went into the mean). Lets you see the fusion math on the command line instead of piping --json through jq/python — closes the diagnostic gap that had operators rebuilding ad-hoc tooling for every ranking investigation.

func FormatAskPointersOnly ¶

func FormatAskPointersOnly(result *AskResult, w io.Writer) error

FormatAskPointersOnly is like FormatAsk but skips body content for both the target note and every context-pack item — output is title + id + type only. Used by the SessionStart hook so the body of "what matters most right now" is never preloaded; the agent has to query for it.

This converts the dogfood rule (use vaultmind ask before answering) from honor-system discipline (manifesto principle 9: discipline does not survive time pressure) to design: every body-read becomes an explicit, logged activation event the agent had to choose, rather than something the preload silently satisfied. Closes the trap documented in the plasticity-gap arc and the 2026-04-25 design signal under step 3 of the plasticity roadmap.

Retrieval is unchanged — search hits, context-pack assembly, and scoring all happen normally. Only the rendering omits bodies. The hint at the bottom names the next move (an explicit ask) so the agent knows the loop closes by querying, not by waiting for more context.

func FormatAskPreview ¶

func FormatAskPreview(result *AskResult, w io.Writer) error

FormatAskPreview renders each ranked hit with a one-line snippet from the note body underneath the title — bridging the gap between pointers-only (titles, no body context) and full-body Ask (3000+ tokens of context pack). Closes the AX gap named in the felt- experience inventory: with pointers-only I see ids and titles but often can't tell what a note actually is until I query its body. The snippet was already being populated by every retriever; this just renders it.

func FormatAskRead ¶

func FormatAskRead(result *AskResult, note *index.FullNote, w io.Writer) error

FormatAskRead renders the ask menu (search header + ranked hits) plus the body of one specific note inline — the single-command shortcut for the probe→read workflow when the agent already knows which hit from the menu they want. Backs `vaultmind ask --read N` and `vaultmind ask --read <id>`. The note argument is the resolved chosen hit's full body; the caller fetches it (cmd/ask.go) so this renderer stays in the format layer without taking a DB dependency.

FormatAskRead always renders without explain. To get per-lane RRF math under each hit when --read and --explain are combined, use FormatAskReadWithOptions.

func FormatAskReadWithOptions ¶

func FormatAskReadWithOptions(result *AskResult, note *index.FullNote, w io.Writer, explain bool) error

FormatAskReadWithOptions is the explain-aware form of FormatAskRead. When explain is true, each ranked hit in the menu shows its per-lane RRF contribution underneath — matching what `vaultmind ask --explain` renders in default mode. Round-3 inter-agent review caught that `--read N --explain` was silently dropping --explain because runAskRead short-circuited before the explain path was read; this is the rendering side of the fix.

func FormatGitStatus ¶

func FormatGitStatus(result *GitStatusResult, w io.Writer) error

FormatGitStatus writes a human-readable summary to w.

func FuzzyTitleMatches ¶

func FuzzyTitleMatches(query string, titles []index.NoteTitle, n int) []index.NoteTitle

FuzzyTitleMatches returns up to n titles whose words overlap with the query's words, ordered by overlap count desc (ties broken by shorter title first — more specific titles win). Titles with zero token overlap are excluded — a silent return beats a misleading suggestion.

Intended as a zero-hit fallback on ask: when retrieval returns nothing, suggest the nearest titles as a user nudge, not as retrieval results.

func MeshSurfacedCounts ¶ added in v0.2.0

func MeshSurfacedCounts(m *DoctorMeshIdentity) (errs, warns int)

MeshSurfacedCounts returns the error/warning counts the mesh section contributes to the doctor rollup. Per M4 + doctor convention, mesh issues are WARNINGS (exit stays 0): every section Warnings entry counts as one warning.

func NoteSimilarities ¶

func NoteSimilarities(ctx context.Context, queryText string, embedder embedding.Embedder, db *index.DB) (map[string]float64, error)

NoteSimilarities embeds the query and computes cosine similarity against all stored note embeddings. Returns noteID -> similarity (0.0-1.0). Returns nil map if embedder is nil (keyword-only mode).

func ResultSurfacedIssueCounts ¶ added in v0.2.0

func ResultSurfacedIssueCounts(result *DoctorResult) (errs, warns int)

ResultSurfacedIssueCounts is the result-level SSOT rollup: it folds the vault-issue counts (SurfacedIssueCounts over result.Issues) together with the mesh-identity section's contribution (MeshSurfacedCounts). Mesh issues are WARNINGS only (doctor keeps exit-0-on-warning; no new exit code), so a not-authenticated mesh state bumps the warning rollup without ever turning it into an error. The cmd-layer rollup line uses this so the text "Issues: E errors, W warnings" reflects the mesh section too.

func RunLinks ¶

func RunLinks(db *index.DB, cfg LinksConfig, w io.Writer) error

RunLinks executes a single-direction ("out" or "in") links query and renders it to w. The `--both` path is handled by the cmd layer via CollectBoth so it can emit ONE envelope rather than two concatenated ones.

func RunNoteGet ¶

func RunNoteGet(db *index.DB, cfg NoteGetConfig, w io.Writer) error

RunNoteGet executes the note get logic.

func RunResolve ¶

func RunResolve(db *index.DB, input, vaultPath string, jsonOut bool, w io.Writer) error

RunResolve executes the resolve command logic.

func RunSelf ¶

func RunSelf(db *index.DB, cfg SelfConfig, w io.Writer) error

RunSelf renders the agent's memory state — recent / hot / stale. Reads only existing schema columns (access_count, last_accessed_at, title, type). Three sections:

Recent: notes touched most recently, regardless of count.
Hot: notes ranked by approximate ACT-R activation (ln(1+count) - d*ln(elapsed_hours)), capturing both frequency and recency in one number.
Stale: accessed notes whose last_accessed_at is older than the stale threshold, sorted by activation desc.

Empty vault prints "no accesses recorded yet" so the caller can tell blank-slate from rendering failure.

func SurfacedIssueCounts ¶ added in v0.2.0

func SurfacedIssueCounts(issues DoctorIssues) (errors, warnings int)

SurfacedIssueCounts returns the error and warning counts for the issues a doctor run actually SURFACES — the items in DoctorIssues that the human report renders as detail lines and that the --json envelope represents. It is the single source of truth for the "Issues: E errors, W warnings" text rollup so that count can never again disagree with --json.

It deliberately does NOT fold in DoctorResult.ValidationSummary (the schema VALIDATION aggregate from SummarizeValidationIssues). That aggregate counts findings — unknown_type, invalid_status — that doctor surfaces only in the nested result.validation_summary JSON field, never as a per-item line in the text report. (missing_required_field and broken_reference ARE surfaced — via result.issues and, for broken_reference, a dedicated text line.) Counting it in the text rollup overstated warnings (e.g. "0 errors, 96 warnings") against a --json envelope that surfaced none. result.validation_summary stays in --json unchanged; this helper only governs the surfaced-set rollup.

Severity follows the doctor renderer's own framing: integrity violations that mean data is wrong are errors; advisories that mean the vault is degraded-but-usable are warnings.

func Truncate ¶

func Truncate(s string, maxLen int) string

Truncate shortens a string to maxLen runes, appending "..." if truncated.

func WriteKeywordOnlyHint ¶

func WriteKeywordOnlyHint(w io.Writer, retrievalMode string, hitCount int) bool

WriteKeywordOnlyHint writes a user-facing diagnostic to w when the ask retrieval ran in keyword-only mode AND returned zero hits. That combination signals a vault without embeddings — paraphrase queries cannot match and the user has no other feedback explaining the silence. Reports whether the hint was written.

Silent on hybrid mode (different problem — real zero-hit) and silent when keyword search actually found results (user got what they asked for).

func WriteTitleSuggestions ¶

func WriteTitleSuggestions(w io.Writer, matches []index.NoteTitle) bool

WriteTitleSuggestions renders a human-readable "did you mean?" block for the given matches. Silent (returns false, writes nothing) on empty input so callers can compose with other zero-hit diagnostics without adding blank sections.

Types ¶

type ActivationReranker ¶

type ActivationReranker struct {
	Base   retrieval.Retriever
	Score  ActivationScoreFn
	Alpha  float64 // weight on RRF score, normalized to [0, 1]
	Beta   float64 // weight on activation score, normalized to [0, 1]
	FetchN int     // candidates to fetch from base; rerank operates on these
}

ActivationReranker is slice 5b” — the post-RRF rerank that replaces slice 5b's 5th-lane approach. It wraps a base retriever (typically the 4-way HybridRetriever), takes the top-N candidates, and reorders them by blending RRF score with activation score.

The structural property: activation operates only on candidates that already cleared the base retriever. It cannot introduce notes from outside the candidate set, so the activation drown-out that broke slice 5b' (mean-of-present treating activation-only single-lane as equivalent to multi-query-lane) is impossible by construction.

See the activation-rerank design decision for the full probe sequence and the candidate-fix analysis that produced this design.

Algorithm:

base.Search(query, FetchN) → N candidates with RRF scores
For each candidate c: rrf_norm[c] = c.Score / max_rrf activation_raw[c] = Score(c.ID) activation_norm[c] = activation_raw[c] / max_activation final[c] = Alpha * rrf_norm[c] + Beta * activation_norm[c]
Sort candidates by final desc.
Return top-K (= the limit passed to Search).

Zero-value Alpha/Beta/FetchN fall back to defaults that preserve the base retriever's behavior — a caller constructing the reranker with no explicit knobs gets safe behavior (rrf-only effective when β=0).

func (*ActivationReranker) Search ¶

func (r *ActivationReranker) Search(ctx context.Context, query string, limit, offset int, filters index.SearchFilters) ([]retrieval.ScoredResult, int, error)

Search fetches FetchN candidates from the base retriever, blends RRF score with activation score using Alpha/Beta weights, sorts by the combined score, and returns the top `limit` (with `offset` applied after sorting).

Implements retrieval.Retriever so the reranker can slot into any pipeline that expects a Retriever — including being further wrapped by future cross-vault federation rerankers.

type ActivationRetriever ¶

type ActivationRetriever struct {
	DB     *index.DB
	ExpDB  *experiment.DB
	Params experiment.ActivationParams
	// Now is the reference time for activation decay. Nil means time.Now().
	// Tests set a fixed instant so ranking is deterministic rather than
	// depending on sub-millisecond timing between access events and scoring.
	Now func() time.Time
}

ActivationRetriever ranks notes by their ACT-R activation score (base-level activation + decay). It implements retrieval.Retriever and is intended to be a 5th lane in HybridRetriever's RRF combine — the first principled wiring of the reinforcement signal that internal/experiment has been collecting since 2026-04-29.

The retriever is query-independent: every Search call returns the same ranking regardless of `query`, because activation is a function of access history, not text relevance. The mean-of-present RRF fusion in HybridRetriever then only boosts notes that ALSO appear in at least one query-dependent lane — recently-accessed notes that happen to match the query rise; recently-accessed notes that don't stay where the query ranks them.

Notes with access_count = 0 are not returned; the lane's coverage matches the substrate's coverage. Filters (type, tag) are honored because the agent's mental model of "activation" is per-type (asking about concepts shouldn't surface recently-touched sources).

func (*ActivationRetriever) Search ¶

func (r *ActivationRetriever) Search(_ context.Context, _ string, limit, offset int, filters index.SearchFilters) ([]retrieval.ScoredResult, int, error)

Search returns up to `limit` notes ranked by activation score, descending. Score is normalized to [0, 1] so it composes cleanly with other retrievers; the absolute scale is irrelevant under RRF (only ranks matter), but normalization keeps Components reporting consistent with the other lanes.

Implementation:

Pull all accessed-note IDs from the index (access_count > 0).
Hydrate metadata (title, type, path, is_domain) in one batch query.
Apply type/tag filters before scoring (cheaper than scoring then dropping).
Call experiment.ComputeBatchScores — which uses the experiment DB's note_access events for the full access-time history that ACT-R math needs.
Sort by score, normalize, return.

type ActivationScoreFn ¶

type ActivationScoreFn func(ids []string) (map[string]float64, error)

ActivationScoreFn returns the activation score for each requested note id. Notes with no access events return 0 (or are absent from the map — the reranker treats both the same). Decoupling the scoring function from experiment.ComputeBatchScores at the type boundary keeps the reranker testable without spinning up an experiment DB and lets future scorers (e.g. activation-with-similarity, decay-only, count-only) compose without changing the reranker.

type AskConfig ¶

type AskConfig struct {
	Query            string
	Budget           int
	MaxItems         int
	SearchLimit      int
	ActivationScores map[string]float64
	// Embedder is optional. When non-nil, raw cosine similarities are computed
	// and used for spreading activation scoring via ActivationFunc.
	Embedder embedding.Embedder
	// ActivationFunc optionally recomputes activation scores after similarities
	// are known. When provided and similarities are available, the returned
	// scores replace ActivationScores for context-pack sorting. This enables
	// spreading activation without coupling query to the experiment package.
	ActivationFunc func(similarities map[string]float64) map[string]float64
	// NoiseFloor is the vault's noise-floor N (the cosine an off-topic query gets
	// to any note). When HasNoiseFloor is true and cosine similarities are
	// available, the top-hit confidence is derived honestly from N and the
	// dispersion below (see internal/noisefloor) instead of the legacy RRF gap.
	NoiseFloor float64
	// NoiseFloorSigma is the vault's note-to-note cosine dispersion σ — the scale
	// for z = (top_cosine − N)/σ. The caller passes a measured, clamped σ; when
	// zero/unset, Ask falls back to the embedder's DefaultDispersion.
	NoiseFloorSigma float64
	HasNoiseFloor   bool
	// VaultLowContrast marks a tight vault (high note-to-note μ) where even
	// correct top hits read "weak". Surfaced on the result so the formatter can
	// explain a weak label rather than let an agent misread it as "irrelevant".
	VaultLowContrast bool
	// SuppressOnNoMatch is the recall floor: when the top hit lands at/below the
	// noise floor (no_match), Ask returns before the context pack and the access
	// fan-out — so an ambient recall on an off-domain prompt neither injects
	// noise nor reinforces the irrelevant note it happened to surface. Opt-in
	// (the recall hook sets it); interactive ask leaves it false.
	SuppressOnNoMatch bool
}

AskConfig holds parameters for the Ask compound operation.

type AskResult ¶

type AskResult struct {
	Query            string                    `json:"query"`
	TopHits          []retrieval.ScoredResult  `json:"top_hits"`
	Context          *memory.ContextPackResult `json:"context,omitempty"`
	RetrievalMode    string                    `json:"retrieval_mode,omitempty"`
	TopHitConfidence string                    `json:"top_hit_confidence,omitempty"`
	Similarities     map[string]float64        `json:"-"` // raw cosine similarities (not serialized)
	// Noise-floor relevance (populated when HasNoiseFloor + similarities exist).
	// TopHitCosine is the raw cosine of the query against the top hit; NoiseFloor
	// is N; NoiseFloorSigma is σ; RelevanceZ = (TopHitCosine − N)/σ is the label's
	// derivation; RelevanceR = TopHitCosine − N is the raw cosine margin, kept for
	// agents that want the un-normalized number. All surfaced so an agent sees the
	// real values, not just the tier word.
	// NoiseFloorApplied is true when TopHitConfidence was derived from the
	// noise-floor relevance (not the RRF-gap fallback). The formatter uses it to
	// print honest "nothing relevant" / "z above noise floor" labels, and JSON
	// consumers use it to know the floats below are meaningful (rather than
	// zero-because-unset). Because it disambiguates, the floats are NOT omitempty
	// — a MiniLM floor of 0.0 or an exact-boundary z of 0.0 must still serialize
	// rather than silently vanish.
	NoiseFloorApplied bool    `json:"noise_floor_applied,omitempty"`
	TopHitCosine      float64 `json:"top_hit_cosine,omitempty"`
	NoiseFloor        float64 `json:"noise_floor"`
	NoiseFloorSigma   float64 `json:"noise_floor_sigma"`
	RelevanceR        float64 `json:"relevance_r"`
	RelevanceZ        float64 `json:"relevance_z"`
	// LowContrastVault is true when the vault is tight (high note-to-note μ), so
	// a "weak" top hit often means the best available correct match, not
	// "irrelevant". The formatter renders a one-line hint when it's set.
	LowContrastVault bool `json:"low_contrast_vault,omitempty"`
}

AskResult is the combined output of a search + context-pack operation. RetrievalMode reports which retriever lane actually ran ("hybrid" when embeddings exist and were used, "keyword" when ask fell back to FTS-only). Set by the caller after Ask returns, since Ask itself does not know which retriever it was handed — the caller (cmd/ask.go) has that signal. Surfacing it in the JSON envelope lets agent consumers detect keyword-only fallback programmatically (the human-readable hint prints on stdout and isn't available via --json).

TopHitConfidence is one of "strong" | "moderate" | "weak" | "no_match" | "". Its PRIMARY derivation is the band-normalized relevance RelevanceZ = (TopHitCosine − NoiseFloor)/NoiseFloorSigma (see internal/noisefloor and NoiseFloorApplied below): z measures how many vault-σ the top hit clears the off-topic noise floor by — which is the question the agent is actually asking, expressed on a scale that transfers across vault tightness. When no noise floor is available (HasNoiseFloor false), it FALLS BACK to the RRF-gap heuristic (computeTopHitConfidenceRRFGap), which measures candidate separation, not relevance, and so never yields "no_match". Read NoiseFloorApplied to tell the two apart.

func Ask ¶

func Ask(ctx context.Context, retriever retrieval.Retriever, resolver *graph.Resolver, db *index.DB, cfg AskConfig) (*AskResult, error)

Ask searches the vault for the query, computes raw cosine similarities (when an embedder is available), recomputes activation scores with spreading activation (via ActivationFunc), then packs token-budgeted context around the top hit. Context-pack failure is non-fatal.

func AskHits ¶

func AskHits(ctx context.Context, retriever retrieval.Retriever, query string, searchLimit int) (*AskResult, error)

AskHits is the search-only equivalent of Ask — runs the retriever and computes top-hit confidence, but skips context-pack assembly, activation re-scoring, and the RecordNoteAccess fan-out that Ask fires on (target + N neighbors).

KNOWN DIVERGENCE (follow-up): AskHits does not compute cosine similarities, so it cannot derive noise-floor confidence — it uses the RRF-gap fallback label. The same query therefore reads as "relevance: <tier> (R=…)" under `ask` but "top-hit confidence: <tier>" under `ask --read N`, with different semantics behind the same tier word. The header phrasing differs so the derivation is at least visible. Unifying them means computing the top hit's cosine here too (an extra embed+score per --read); deferred until that cost is justified. Used by callers that want the menu without committing to the top hit, e.g. `vaultmind ask --read 2` reads hit #2's body — so packing context around hit #1 would mis-attribute access events to a note the agent didn't read. The caller is responsible for firing access on whatever it chooses to read.

type AutoRetrieverResult ¶

type AutoRetrieverResult struct {
	Retriever retrieval.Retriever
	Embedder  embedding.Embedder // nil when keyword-only (no embeddings)
	Cleanup   func()             // always non-nil; safe to defer unconditionally
}

AutoRetrieverResult holds the retriever, embedder, and cleanup from BuildAutoRetriever. Retriever is always non-nil. Embedder is nil in keyword-only mode (no embeddings). Cleanup is always safe to call unconditionally (no-op in keyword-only mode).

func BuildAutoRetrieverFull ¶

func BuildAutoRetrieverFull(db *index.DB) AutoRetrieverResult

BuildAutoRetrieverFull is like BuildAutoRetriever but also exposes the embedder for computing raw cosine similarities (spreading activation).

func BuildAutoRetrieverWithActivation ¶

func BuildAutoRetrieverWithActivation(db *index.DB, expDB *experiment.DB) AutoRetrieverResult

BuildAutoRetrieverWithActivation is BuildAutoRetrieverFull plus the activation lane (slice 5b' from the plasticity roadmap). When expDB is non-nil and the underlying retriever is hybrid, an ActivationRetriever is appended as a 5th RRF lane named "activation". Notes with access_count = 0 simply don't appear in this lane, so cold-start vaults degrade cleanly to 4-way RRF.

IMPORTANT calibration obligation: enabling activation shifts the rank-1/rank-2 score gap distribution that step-4's TopHitConfidence thresholds (5% / 2%) were calibrated against. Callers turning this on for the default ask path must re-probe the gap distribution and update the threshold constants in internal/query/format.go, or the strong/moderate/weak labels silently miscalibrate. See the priority- order doc for the full step-4 ↔ step-5 coupling.

func BuildAutoRetrieverWithRerank ¶

func BuildAutoRetrieverWithRerank(db *index.DB, expDB *experiment.DB, alpha, beta float64) AutoRetrieverResult

BuildAutoRetrieverWithRerank returns a 4-way HybridRetriever wrapped in an ActivationReranker (slice 5b”). The reranker takes the top FetchN candidates from 4-way and reorders them by blending RRF score with activation score using Alpha/Beta weights.

Replaces BuildAutoRetrieverWithActivation (slice 5b' — 5th-lane variant) for the default activation-aware path. The lane variant stays on disk, unwired, as the documented escalation that didn't pan out — see the activation-rerank design decision.

IMPORTANT calibration obligation (carried over from 5b'): enabling this reranker shifts the rank-1/rank-2 score gap distribution that step-4's TopHitConfidence thresholds (5% / 2%) were calibrated against. The blended score's gap distribution differs from the raw RRF gap; callers turning this on for the default ask path must re-probe and update the threshold constants in internal/query/format.go, or the strong/moderate/ weak labels silently miscalibrate.

Zero-value alpha/beta fall back to defaults pinned 2026-05-03 from the α/β probe across {0.5/0.5, 0.7/0.3, 0.9/0.1, 0.95/0.05} — see the activation-rerank design decision for the data and rationale. Current defaults: α=0.9 / β=0.1.

type BothResult ¶ added in v0.1.11

type BothResult struct {
	Out struct {
		SourceID string          `json:"source_id"`
		Links    []graph.OutLink `json:"links"`
	} `json:"out"`
	In struct {
		TargetID string         `json:"target_id"`
		Links    []graph.InLink `json:"links"`
	} `json:"in"`
}

BothResult is the combined payload for a `--both` query: it carries the outbound and inbound directions in ONE structure so the cmd layer can wrap it in a single envelope (out before in) instead of emitting two concatenated envelopes (invalid JSON). Inbound links use the same "links" key as the standalone --in payload; the object nesting (result.in vs result.out) already disambiguates direction.

func CollectBoth ¶ added in v0.1.11

func CollectBoth(db *index.DB, cfg LinksConfig) (BothResult, string, error)

CollectBoth resolves cfg.Input and returns the combined out+in payload WITHOUT rendering. Used by the cmd layer to build a single `--both` envelope.

type ColBERTEmbedFunc ¶

type ColBERTEmbedFunc func(ctx context.Context, text string) ([][]float32, error)

ColBERTEmbedFunc produces per-token ColBERT vectors for a query string.

type ColBERTRetriever ¶

type ColBERTRetriever struct {
	DB           *index.DB
	EmbedColBERT ColBERTEmbedFunc
	Dims         int
}

ColBERTRetriever searches by MaxSim scoring between query and stored ColBERT token matrices.

func (*ColBERTRetriever) Search ¶

func (r *ColBERTRetriever) Search(ctx context.Context, query string, limit, offset int, filters index.SearchFilters) ([]retrieval.ScoredResult, int, error)

Search embeds the query as per-token ColBERT vectors, computes MaxSim scoring against all stored ColBERT embeddings, and returns the top results sorted by score descending.

type ContentDrift ¶

type ContentDrift struct {
	NoteID      string `json:"note_id"`
	Path        string `json:"path"`
	CurrentHash string `json:"current_hash"` // sha256(file content)
	StoredHash  string `json:"stored_hash"`  // notes.hash from index DB
}

ContentDrift describes one note whose current file content hash differs from the indexer's stored hash.

func DetectContentDrift ¶

func DetectContentDrift(db *index.DB, vaultPath string) ([]ContentDrift, error)

DetectContentDrift compares each domain note's current file content hash against the indexer's stored hash and returns the notes whose content has changed since the last `vaultmind index` pass.

Hash-based, NOT mtime-based: git checkouts, branch switches, and other VCS operations bump file mtime without touching content. The prior mtime-based detector produced ~95% false positives on real vaults (385 of 407 notes after a routine `git checkout main`). sha256 over the full file gives precise content identity — only real content edits trigger drift.

Per-note IO failures (deleted files, unreadable permissions) are silently skipped. The indexer reports those via its own path; doctor's job is health summary, not filesystem-error reporting. ORDER BY path gives deterministic output (the experiment framework consumes this JSON; stable order avoids spurious diffs).

type DoctorAllResult ¶ added in v0.1.11

type DoctorAllResult struct {
	Rollup DoctorRollup    `json:"rollup"`
	Vaults []*DoctorResult `json:"vaults"`
	Failed []FailedVault   `json:"failed,omitempty"`
}

DoctorAllResult is the combined, single-envelope payload for `doctor --all`. It carries a workspace-level Rollup plus the per-vault DoctorResults. Each entry in Vaults is a full DoctorResult (which already carries its own vault_path), so consumers get one machine-readable value for the whole workspace — NOT one envelope per vault.

Failed carries every discovered vault that could not be opened or diagnosed: rather than silently dropping a corrupt vault (which would hide it from the operator), we surface it here by path and reason. The field is omitempty so a clean workspace emits no `failed` key. Reporting failures in this combined envelope — instead of a per-vault error envelope — preserves the single-envelope contract.

type DoctorEmbeddings ¶

type DoctorEmbeddings struct {
	TotalNotes           int    `json:"total_notes"`
	DenseCount           int    `json:"dense_count"`
	SparseCount          int    `json:"sparse_count"`
	ColBERTCount         int    `json:"colbert_count"`
	Model                string `json:"model"` // "bge-m3", "minilm", "mixed", or "" when no dense embeddings
	SemanticReady        bool   `json:"semantic_ready"`
	HasModalityImbalance bool   `json:"has_modality_imbalance"`
	// MixedModel is non-nil when the vault has notes embedded with more than
	// one model (e.g. mid-upgrade from MiniLM to BGE-M3). Each entry pairs a
	// model name with its row count. When set, Model == "mixed". Surfacing
	// this explicitly prevents the silent-failure shape where doctor reports
	// "bge-m3" while half the rows are still MiniLM. See vaultmind#22.
	MixedModel []DoctorModelBreakdown `json:"mixed_model,omitempty"`
}

DoctorEmbeddings reports the vault's semantic-retrieval readiness. Surfaces which embedding lanes are populated so a user can diagnose a keyword-only fallback at a glance without running an ask query and hitting zero hits.

HasModalityImbalance flags the failure mode where a BGE-M3 vault has dense embeddings but some notes are missing sparse or colbert. Under hybrid RRF this silently compresses ranking: a partially-covered note at rank 1 in 2 lanes loses to a ubiquitous rank-3 note across 4 lanes. Dense-only vaults (MiniLM) are never flagged — sparse/colbert don't apply to that model.

type DoctorIssues ¶

type DoctorIssues struct {
	DuplicateIDs              int                `json:"duplicate_ids"`
	BrokenReferences          int                `json:"broken_references"`
	MissingRequiredFields     int                `json:"missing_required_fields"`
	MalformedMarkers          int                `json:"malformed_markers"`
	UnresolvedLinks           int                `json:"unresolved_links"`
	NotesMissingIDOrType      int                `json:"notes_missing_id_or_type"`
	UnresolvedLinkDetails     []UnresolvedLink   `json:"unresolved_link_details,omitempty"`
	ObsidianIncompatibleLinks int                `json:"obsidian_incompatible_links"`
	IncompatibleLinkDetails   []IncompatibleLink `json:"incompatible_link_details,omitempty"`
	PathPseudoIDLinks         int                `json:"path_pseudo_id_links"`
	PathPseudoIDDetails       []UnresolvedLink   `json:"path_pseudo_id_details,omitempty"`

	// StaleIndex counts domain notes whose current file content hash
	// differs from the indexer's stored hash — i.e. notes edited AFTER
	// the last `vaultmind index` pass. The operator-visible signal that
	// downstream artifacts (index, embeddings, marker sections) are out
	// of sync with the source. See DetectContentDrift for the detection
	// contract.
	//
	// Replaced an earlier mtime-based detector that produced ~95% false
	// positives on real vaults — git checkouts/branch switches/pulls
	// bump mtime without touching content, and the prior signal could
	// not distinguish "edited" from "VCS-touched". Hash comparison is
	// precise: only actual content edits trigger drift.
	StaleIndex        int            `json:"stale_index"`
	StaleIndexDetails []ContentDrift `json:"stale_index_details,omitempty"`

	// HookDrift counts Claude Code hook scripts in the project's
	// `.claude/scripts/` whose bytes differ from the embedded canonical
	// in `internal/hookscripts/`. Surfaces "the foundation has rotted"
	// — copies were edited, or the binary was upgraded but old copies
	// linger. Resolution: `vaultmind hooks install --force <project>`.
	// Populated by cmd/doctor.go (project dir comes from there); query
	// layer keeps the type but doesn't import internal/hooks (business
	// layer isolation per ADR-009).
	HookDrift        int      `json:"hook_drift"`
	HookDriftDetails []string `json:"hook_drift_details,omitempty"`

	// LegacyHooksJSON is true when `.claude/hooks.json` exists at the
	// project root. That standalone file is no longer recognized by
	// Claude Code 2.1.129+ — projects with it have silently broken
	// hooks. The fix is to migrate the contents into
	// `.claude/settings.json` under a top-level `hooks` key.
	// Live evidence from companion project dogfood 2026-05-06/07.
	// Populated by cmd/doctor.go (project dir comes from there); query
	// layer keeps the type but doesn't import internal/hooks per
	// ADR-009 business-business isolation.
	LegacyHooksJSON bool `json:"legacy_hooks_json"`
}

DoctorIssues holds counts of vault health issues.

type DoctorMeshIdentity ¶ added in v0.2.0

type DoctorMeshIdentity struct {
	// Tier 1 — identity custody (local, keyless).
	KeyPresent      bool `json:"key_present"`
	KeyModeOK       bool `json:"key_mode_ok"`
	KeySizeOK       bool `json:"key_size_ok"`
	SignerReachable bool `json:"signer_reachable"` // INFO — signer is on-demand

	// Tier 2 — binding resolves in the live mesh (authenticated).
	Pinned          bool   `json:"pinned"`
	Authenticated   bool   `json:"authenticated"` // M4 top-level boolean
	NetworkID       string `json:"network_id,omitempty"`
	BindingResolves bool   `json:"binding_resolves"`
	HoldsBindingKey bool   `json:"holds_binding_key"` // selfVerify proof-of-possession
	Status          string `json:"status"`

	// Tier 3 — chat reachability (honest labels).
	DaemonReachable       bool   `json:"daemon_reachable"`
	DaemonMode            string `json:"daemon_mode,omitempty"`
	EnforcementActive     bool   `json:"enforcement_active"` // always false today
	WatcherHeartbeatFresh bool   `json:"watcher_heartbeat_fresh"`
	WatcherHeartbeatAge   int    `json:"watcher_heartbeat_age_secs"`

	// Warnings each also drive an envelope.AddWarning in the cmd layer.
	Warnings []string `json:"warnings,omitempty"`
}

DoctorMeshIdentity is the JSON-serializable mesh-health section. It is a POINTER on DoctorResult (nil ⇒ absent from --json) so the section appears only when a mesh signal exists.

func BuildMeshIdentity ¶ added in v0.2.0

func BuildMeshIdentity(ctx context.Context, in MeshDoctorInput) (*DoctorMeshIdentity, error)

BuildMeshIdentity runs the 3-tier Contract-B identity health check and returns the populated section. It NEVER reads the private key file (tier-1 is Lstat stat-only; the binding-key check is a keyless proof-of-possession via the signer). Authenticated is true ONLY on the pinned + resolves + selfVerify-pass path (M1+M3); every other state leaves Authenticated false (M4).

func (*DoctorMeshIdentity) HasSignal ¶ added in v0.2.0

func (m *DoctorMeshIdentity) HasSignal() bool

HasSignal reports whether any mesh signal exists, so the cmd layer can decide whether to attach the section at all (nil ⇒ absent from --json).

type DoctorModelBreakdown ¶

type DoctorModelBreakdown struct {
	Model string `json:"model"` // "bge-m3", "minilm", or "unknown"
	Count int    `json:"count"`
}

DoctorModelBreakdown is one entry in DoctorEmbeddings.MixedModel.

type DoctorResult ¶

type DoctorResult struct {
	VaultPath         string                    `json:"vault_path"`
	TotalFiles        int                       `json:"total_files"`
	DomainNotes       int                       `json:"domain_notes"`
	UnstructuredNotes int                       `json:"unstructured_notes"`
	IndexStatus       string                    `json:"index_status"`
	Embeddings        *DoctorEmbeddings         `json:"embeddings,omitempty"`
	Types             map[string]StatusTypeInfo `json:"types,omitempty"`
	ValidationSummary *StatusIssuesSummary      `json:"validation_summary,omitempty"`
	Issues            DoctorIssues              `json:"issues"`

	// MeshIdentity is the Contract-B identity-health section. It is a POINTER
	// with omitempty so it is ABSENT from --json unless a mesh signal exists
	// (identity key file present, an anchor exists, a --mesh-* flag passed, or
	// the daemon is reachable). Populated by the cmd layer (paths come from xdg +
	// flags + env). nil ⇒ no mesh substrate, the section is omitted entirely.
	MeshIdentity *DoctorMeshIdentity `json:"mesh_identity,omitempty"`
}

DoctorResult is the JSON-serializable output of the doctor command.

Types and ValidationSummary carry the per-type breakdown and raw validation aggregate that `vault status` used to produce. doctor is now the single health hub, so the cold-start view (`doctor --summary`) and the read-only diagnosis (`doctor`) both surface them. They are populated by the cmd layer (which holds the vault config + schema registry) via the shared status.go helpers — the same place HookDrift is populated — so query.Doctor's signature stays stable.

ValidationSummary is a POINTER with omitempty so the unmeasured state (a raw query.Doctor call with no schema registry — e.g. the query-layer tests) omits the field entirely. The earlier non-pointer struct always serialized a false-zero {errors:0,warnings:0}, indistinguishable from a measured-healthy vault. nil now means "validation not run"; a non-nil &{0,0} means "ran, found nothing". Types likewise stays omitempty (empty map => omitted).

ValidationSummary (JSON: "validation_summary") holds the RAW schema-validation aggregate — all ValidateResult.Issues bucketed by severity. It may be larger than the surfaced result.issues set because it includes unknown_type / invalid_status findings the text report never renders as per-item lines. These are distinct labeled axes: validation_summary is the raw aggregate; result.issues is the surfaced/actionable set.

func Doctor ¶

func Doctor(db *index.DB, vaultPath string, reg *schema.Registry) (*DoctorResult, error)

Doctor runs vault health diagnostics against the indexed database.

When reg is non-nil, the missing_required_fields counter is populated by running the schema validator and summing missing-field issues. When reg is nil, the counter stays 0 (caller hasn't loaded a registry — usually a misconfiguration). The signature was extended 2026-05-04 to close the silent-failure shape where MissingRequiredFields was a declared output that never got populated.

type DoctorRollup ¶ added in v0.1.11

type DoctorRollup struct {
	VaultCount int `json:"vault_count"`
	Discovered int `json:"discovered"`
	Diagnosed  int `json:"diagnosed"`
	Failed     int `json:"failed"`
	TotalNotes int `json:"total_notes"`
	// TotalErrors/TotalWarnings are the SURFACED set — the sum of each vault's
	// query.ResultSurfacedIssueCounts, the same axis each per-vault report
	// prints — so the rollup totals reconcile with summing the per-vault
	// reports. They deliberately do NOT count the raw schema-validation
	// aggregate (see TotalRawValidationFindings), which is a different axis.
	TotalErrors   int `json:"total_errors"`
	TotalWarnings int `json:"total_warnings"`
	// TotalRawValidationFindings is the RAW schema-validation aggregate summed
	// across vaults: each vault's ValidationSummary.Errors+Warnings (nil ⇒ 0).
	// It is a DIFFERENT axis from TotalErrors/TotalWarnings — it includes
	// unknown_type / invalid_status findings the per-vault text report never
	// surfaces as per-item lines. Kept under its own distinctly-labeled key so
	// the two axes are explicit rather than collapsed, mirroring how the
	// single-vault envelope keeps validation_summary alongside the surfaced set.
	TotalRawValidationFindings int `json:"total_raw_validation_findings"`

	// VaultsWithIssues lists the paths of vaults with errors or warnings, in the
	// order the vaults were diagnosed (already deterministic — discovery sorts
	// them). Always non-nil so JSON consumers see [] for a clean workspace
	// rather than null.
	VaultsWithIssues []string `json:"vaults_with_issues"`
	// contains filtered or unexported fields
}

DoctorRollup summarizes the health of every vault discovered under the root: how many vaults, the total note count, the combined error/warning counts, and the paths of vaults that have at least one error or warning. It is the top-of-output signal an operator scans before reading per-vault detail.

The count fields are kept honest so a vault that failed to open can never hide behind a lower number: Discovered is the total found (Diagnosed + Failed), Diagnosed is how many produced a full report, and Failed is how many could not be opened. VaultCount is retained as an alias of Discovered for backward compatibility with existing JSON consumers.

func BuildDoctorRollup ¶ added in v0.1.11

func BuildDoctorRollup(vaults []*DoctorResult, failed []FailedVault) DoctorRollup

BuildDoctorRollup aggregates per-vault DoctorResults into a workspace rollup. The headline TotalErrors/TotalWarnings sum each vault's SURFACED counts (ResultSurfacedIssueCounts — the same axis every per-vault report prints), so `doctor --all` totals reconcile with summing the per-vault reports. A vault counts as "having issues" iff its surfaced errors+warnings > 0, keyed on the same axis. The RAW schema-validation aggregate is not discarded: it is summed separately into TotalRawValidationFindings (each vault's ValidationSummary.Errors+Warnings; a nil ValidationSummary — a raw, un-validated result — contributes zero and never panics). The failed slice (vaults that could not be opened) is folded into the honest count breakdown — Discovered = diagnosed + failed — so the rollup never under-reports the number of vaults found. Pure aggregation: no I/O, no mutation of inputs.

func (DoctorRollup) RawValidationGap ¶ added in v0.2.0

func (r DoctorRollup) RawValidationGap() int

RawValidationGap reports how many RAW schema-validation findings are NOT surfaced as per-item text lines across the workspace — the workspace-level analogue of the single-vault gap line. It is TotalRawValidationFindings minus the surfaced validation findings (MissingRequiredFields + BrokenReferences), never the full surfaced headline (which folds in non-validation items and would under-report the gap). A non-positive result means nothing is hidden.

type EmbeddingRetriever ¶

type EmbeddingRetriever struct {
	DB       *index.DB
	Embedder embedding.Embedder
}

EmbeddingRetriever searches by cosine similarity between query and stored note embeddings.

func (*EmbeddingRetriever) Search ¶

func (r *EmbeddingRetriever) Search(ctx context.Context, query string, limit, offset int, filters index.SearchFilters) ([]retrieval.ScoredResult, int, error)

Search embeds the query, computes cosine similarity against all stored embeddings, and returns the top results sorted by score descending.

type FTSRetriever ¶

type FTSRetriever struct {
	DB *index.DB
}

FTSRetriever wraps SQLite FTS5 search as a Retriever.

func (*FTSRetriever) Search ¶

func (r *FTSRetriever) Search(_ context.Context, query string, limit, offset int, filters index.SearchFilters) ([]retrieval.ScoredResult, int, error)

Search runs FTS5 search and count, converting results to ScoredResult.

type FailedVault ¶ added in v0.1.11

type FailedVault struct {
	VaultPath string `json:"vault_path"`
	Error     string `json:"error"`
}

FailedVault names a discovered vault that could not be opened or diagnosed, together with the reason. It is the surfaced form of what `doctor --all` used to silently skip: an operator (human or JSON consumer) sees the path and the error instead of the vault vanishing.

type GitStatusResult ¶

type GitStatusResult struct {
	RepoDetected     bool     `json:"repo_detected"`
	Branch           string   `json:"branch"`
	Detached         bool     `json:"detached"`
	MergeInProgress  bool     `json:"merge_in_progress"`
	RebaseInProgress bool     `json:"rebase_in_progress"`
	WorkingTreeClean bool     `json:"working_tree_clean"`
	StagedFiles      []string `json:"staged_files"`
	UnstagedFiles    []string `json:"unstaged_files"`
	UntrackedFiles   []string `json:"untracked_files"`
}

GitStatusResult is the JSON response for the git status command.

func GitStatus ¶

func GitStatus(detector git.RepoStateDetector, vaultPath string) (*GitStatusResult, error)

GitStatus detects git repository state and returns the result.

type HybridRetriever ¶

type HybridRetriever struct {
	Retrievers []retrieval.NamedRetriever
	K          int // RRF smoothing constant; zero-value falls back to DefaultRRFK
}

HybridRetriever fuses results from N retrievers using Reciprocal Rank Fusion. Sub-retrievers are named so each note's Components map reports which sub-retriever contributed what — useful for studying the 4-way RRF contribution ("what did FTS add here vs dense?") during research.

Fusion uses mean-of-present RRF: a note's score is the mean of its 1/(K+rank) contributions across the lanes where it appeared. Absence from a lane is treated as missing data, not a zero score. This is the fix for the 2026-04-24 partial-coverage compression bug where newly- added notes missing sparse/colbert embeddings lost to ubiquitous- mediocre competitors even when they were rank 1 where they scored.

In small vaults (fetchLimit ≥ note count) this is unambiguously correct: absence from a lane's results means the modality wasn't computed for that note. In very large vaults (where fetchLimit truncates a lane's tail) absence could also mean "ranked below cutoff" — a different failure mode worth separate attention, not solved here.

func (*HybridRetriever) Search ¶

func (h *HybridRetriever) Search(ctx context.Context, query string, limit, offset int, filters index.SearchFilters) ([]retrieval.ScoredResult, int, error)

Search runs all retrievers concurrently, then fuses their ranked lists via RRF.

type InResult ¶ added in v0.1.11

type InResult struct {
	TargetID string         `json:"target_id"`
	Links    []graph.InLink `json:"links"`
}

InResult is the JSON-serializable payload for an inbound-links (backlinks) query. TargetID is the queried note; Links are the edges pointing at it.

func CollectIn ¶ added in v0.1.11

func CollectIn(db *index.DB, noteID, edgeType string) (InResult, error)

CollectIn returns the inbound-links (backlinks) payload for noteID WITHOUT rendering.

type IncompatibleLink ¶

type IncompatibleLink struct {
	SourceID     string `json:"source_id"`
	SourcePath   string `json:"source_path"`
	TargetRaw    string `json:"target_raw"`
	SuggestedFix string `json:"suggested_fix"`
}

IncompatibleLink describes a wikilink that resolves in VaultMind but not in Obsidian. Obsidian resolves [[target]] by matching target against filenames (without extension). If dst_raw uses a note's title instead of its filename stem, Obsidian won't find it.

type LinksConfig ¶

type LinksConfig struct {
	Input      string
	Direction  string // "out", "in", or "both"
	EdgeType   string
	JSONOutput bool
	VaultPath  string
	IndexHash  string
}

LinksConfig holds parameters for links out/in operations.

type MeasuredCalibration ¶

type MeasuredCalibration struct {
	NoiseFloor       float64
	NoiseFloorProbes int
	ProbeSetVersion  int
	NTNCosineMu      float64
	NTNCosineSigma   float64
	NTNSampleCount   int
	NoteCount        int
	EmbeddingDims    int
}

MeasuredCalibration is the raw per-vault calibration measured from the embedding space. The cmd layer stamps it with an id + timestamp and stores it as an experiment.CalibrationSnapshot.

func MeasureNoiseFloor ¶

func MeasureNoiseFloor(ctx context.Context, embedder embedding.Embedder, db *index.DB) (*MeasuredCalibration, error)

MeasureNoiseFloor measures a vault's noise floor N and note-to-note cosine dispersion from its stored dense embeddings. N is the max cosine any of the fixed off-topic probe strings reaches against any note — the cosine an off-domain query gets, which the relevance metric R = top_cosine - N is measured against. Requires an embedder (for the probes) and embedded notes.

type MeshDaemonClient ¶ added in v0.2.0

type MeshDaemonClient interface {
	FetchRoot(ctx context.Context) (doctorclient.WellKnownRoot, error)
	FetchDirectory(ctx context.Context) ([]byte, error)
	Whoami(ctx context.Context) (bool, string, error)
}

MeshDaemonClient is the loopback-pinned daemon-probe seam. doctorclient.Client satisfies this.

type MeshDoctorInput ¶ added in v0.2.0

type MeshDoctorInput struct {
	// Tier-1 custody (STAT-ONLY — never read).
	KeyPath    string
	SocketPath string

	// Tier-2 authenticity.
	PinnedRootPub ed25519.PublicKey // nil ⇒ UNPINNED path (never green)
	NetworkID     string
	RegistryBytes []byte // offline registry override (--mesh-registry); else fetched
	Slug          string
	Signer        MeshSigner // keyless proof-of-possession

	// Tier-3 reachability.
	Daemon        MeshDaemonClient
	HeartbeatPath string

	// Clock seam.
	Now time.Time
}

MeshDoctorInput carries everything BuildMeshIdentity needs. The cmd layer resolves paths (xdg + flags + env), the anchor pin, the slug, and constructs the signer + loopback daemon client.

type MeshSigner ¶ added in v0.2.0

type MeshSigner interface {
	Sign(canonicalBytes []byte) ([]byte, error)
}

MeshSigner is the keyless signing seam: doctor asks the SIGNER (over the UDS) to sign a challenge; it NEVER reads the private key file itself. signer.Client satisfies this.

type MgetResult ¶

type MgetResult struct {
	Notes    []index.FullNote `json:"notes"`
	NotFound []string         `json:"not_found"`
	Total    int              `json:"total"`
}

MgetResult is the response for batch note reads.

func Mget ¶

func Mget(db *index.DB, ids []string, frontmatterOnly bool) (*MgetResult, error)

Mget fetches multiple notes by ID. If frontmatterOnly is true, body/headings/blocks are omitted from each note.

type NeighborEdge ¶

type NeighborEdge struct {
	SourceID   string  `json:"source_id"`
	EdgeType   string  `json:"edge_type"`
	Confidence string  `json:"confidence"`
	Weight     float64 `json:"weight"`
}

NeighborEdge describes the edge by which a neighbor was reached.

type NeighborNode ¶

type NeighborNode struct {
	ID       string        `json:"id"`
	Distance int           `json:"distance"`
	EdgeFrom *NeighborEdge `json:"edge_from,omitempty"`
}

NeighborNode represents a single node in the neighbors result.

type NeighborsResult ¶

type NeighborsResult struct {
	StartID         string         `json:"start_id"`
	Nodes           []NeighborNode `json:"nodes"`
	MaxNodesReached bool           `json:"max_nodes_reached"`
}

NeighborsResult holds the result of a neighbors traversal query.

func Neighbors ¶

func Neighbors(resolver *graph.Resolver, input string, depth int, minConfidence string, maxNodes int) (*NeighborsResult, error)

Neighbors performs a BFS traversal from the given input (resolved via the Resolver's entity resolution) and returns the result in a format suitable for JSON output.

type NoteGetConfig ¶

type NoteGetConfig struct {
	Input           string
	FrontmatterOnly bool
	JSONOutput      bool
	VaultPath       string
}

NoteGetConfig holds parameters for the note get operation.

type OutResult ¶ added in v0.1.11

type OutResult struct {
	SourceID string          `json:"source_id"`
	Links    []graph.OutLink `json:"links"`
}

OutResult is the JSON-serializable payload for an outbound-links query. SourceID is the queried note; Links are the edges leaving it.

func CollectOut ¶ added in v0.1.11

func CollectOut(db *index.DB, noteID, edgeType string) (OutResult, error)

CollectOut returns the outbound-links payload for noteID WITHOUT rendering. Split from rendering so the cmd layer can aggregate directions into one envelope for the `--both` JSON path.

type SearchConfig ¶

type SearchConfig struct {
	Query      string
	Limit      int
	Offset     int
	TypeFilter string
	TagFilter  string
	JSONOutput bool
	VaultPath  string
}

SearchConfig holds search parameters.

type SearchResult ¶

type SearchResult struct {
	Query  string                   `json:"query"`
	Offset int                      `json:"offset"`
	Limit  int                      `json:"limit"`
	Hits   []retrieval.ScoredResult `json:"hits"`
	Total  int                      `json:"total"`
}

SearchResult is the JSON response for search.

func RunSearch ¶

func RunSearch(retriever retrieval.Retriever, cfg SearchConfig, w io.Writer) (*SearchResult, error)

RunSearch executes the search command logic and returns the result for downstream use (e.g. experiment logging). Rendering is still written to w.

type SelfConfig ¶

type SelfConfig struct {
	Limit          int
	StaleThreshold time.Duration
	DecayD         float64
	Now            time.Time
}

SelfConfig holds parameters for the self-introspection view.

func SelfDefaults ¶

func SelfDefaults(cfg SelfConfig) SelfConfig

SelfDefaults fills zero-value SelfConfig fields with sensible defaults.

type SparseEmbedFunc ¶

type SparseEmbedFunc func(ctx context.Context, text string) (map[int32]float32, error)

SparseEmbedFunc produces a sparse vector for a query string.

type SparseRetriever ¶

type SparseRetriever struct {
	DB          *index.DB
	EmbedSparse SparseEmbedFunc
}

SparseRetriever searches by sparse dot-product between query and stored sparse embeddings.

func (*SparseRetriever) Search ¶

func (r *SparseRetriever) Search(ctx context.Context, query string, limit, offset int, filters index.SearchFilters) ([]retrieval.ScoredResult, int, error)

Search embeds the query as a sparse vector, computes dot-product similarity against all stored sparse embeddings, and returns the top results sorted by score descending.

type StatusIssuesSummary ¶

type StatusIssuesSummary struct {
	Errors   int `json:"errors"`
	Warnings int `json:"warnings"`
}

StatusIssuesSummary holds aggregated issue counts.

func SummarizeValidationIssues ¶ added in v0.1.11

func SummarizeValidationIssues(db *index.DB, reg *schema.Registry) (StatusIssuesSummary, error)

SummarizeValidationIssues runs the schema validator and rolls its issues up into error/warning counts. Shared by `vault status` and `doctor` so both report the same rollup (SSOT). A nil reg yields a zero-value summary.

type StatusResult ¶

type StatusResult struct {
	VaultPath         string                    `json:"vault_path"`
	TotalFiles        int                       `json:"total_files"`
	DomainNotes       int                       `json:"domain_notes"`
	UnstructuredNotes int                       `json:"unstructured_notes"`
	IndexStatus       string                    `json:"index_status"`
	IndexStale        bool                      `json:"index_stale"`
	Types             map[string]StatusTypeInfo `json:"types"`
	IssuesSummary     StatusIssuesSummary       `json:"issues_summary"`
}

StatusResult is the JSON response for vault status.

func VaultStatus ¶

func VaultStatus(db *index.DB, vaultPath string, cfg *vault.Config, reg *schema.Registry) (*StatusResult, error)

VaultStatus combines doctor, schema, and validation into a single cold-start response.

type StatusTypeInfo ¶

type StatusTypeInfo struct {
	Count    int      `json:"count"`
	Required []string `json:"required"`
	Statuses []string `json:"statuses"`
}

StatusTypeInfo describes a note type with its count and schema.

type UnresolvedLink ¶

type UnresolvedLink struct {
	SourceID   string `json:"source_id"`
	SourcePath string `json:"source_path"`
	TargetRaw  string `json:"target_raw"`
}

UnresolvedLink describes a single unresolved link with source and target info.

type ValidateIssue ¶

type ValidateIssue struct {
	Path     string `json:"path"`
	ID       string `json:"id"`
	Severity string `json:"severity"`
	Rule     string `json:"rule"`
	Message  string `json:"message"`
	Field    string `json:"field,omitempty"`
	Value    string `json:"value,omitempty"`
}

ValidateIssue represents a single validation finding.

type ValidateResult ¶

type ValidateResult struct {
	FilesChecked int             `json:"files_checked"`
	Valid        int             `json:"valid"`
	Issues       []ValidateIssue `json:"issues"`
}

ValidateResult is the JSON-serializable output of frontmatter validate.

func Validate ¶

func Validate(db *index.DB, reg *schema.Registry) (*ValidateResult, error)

Validate runs all frontmatter validation rules against the indexed notes.

func ValidateLive ¶

func ValidateLive(vaultPath string, reg *schema.Registry) (*ValidateResult, error)

ValidateLive walks vaultPath, parses each .md file's frontmatter, and runs schema rules against the live files on disk. It does NOT require an index.

Rules evaluated: unknown_type, missing_required_field, invalid_status. Unparseable frontmatter is reported as invalid_frontmatter. The broken_reference rule is skipped — it requires the full link graph, which only the indexer produces.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL