graph

package

v0.8.0 Latest Latest Go to latest Published: Mar 29, 2026 License: MIT Imports: 24 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/SynapsesOS/synapses

Links

Open Source Insights

Documentation ¶

Overview ¶

Package graph implements the core in-memory graph engine for Synapses. It stores code entities (nodes) and their relationships (edges), and provides BFS-based context carving with edge-type-weighted relevance decay.

Index ¶

Constants
Variables
func CrossDomainCategory(et EdgeType) string
func ExportDOT(nodes []*Node, edges []*Edge, repoRoot string, includeMeta bool) string
func ExportGraphML(nodes []*Node, edges []*Edge, repoRoot string) string
func ExportMermaid(nodes []*Node, edges []*Edge, repoRoot string, includeMeta bool) string
func IntentCarveWeights(intent string) map[EdgeType]float64
func IntentDirectionBoost(intent string) float64
func IsCrossDomainEdge(et EdgeType) bool
func NodeTypeToUint8(nt NodeType) uint8
func Serialize(fg *FlatGraph, w io.Writer) error
type APISurfaceInfo
type BulkEdge
type CallSite
type CarveConfig
- func DefaultCarveConfig() CarveConfig
type CarvedNode
type CrossDomainContext
- func (c *CrossDomainContext) IsEmpty() bool
type CrossDomainRef
type DomainType
type Edge
type EdgeIndex
type EdgeType
type EdgeTypeDescriptor
- func GetEdgeTypes() []EdgeTypeDescriptor
type EdgeWeightKey
type EntityInfo
type EntityRef
type ErrNodeNotFound
- func (e ErrNodeNotFound) Error() string
type FlatGraph
- func Deserialize(r io.Reader) (*FlatGraph, error)
- func NewFlatGraph(repoID string) *FlatGraph
- func (fg *FlatGraph) AddNode(name StringID, nodeType NodeType, fileID StringID, nsID uint16) NodeIndex
- func (fg *FlatGraph) BulkAddEdges(edges []BulkEdge) int
- func (fg *FlatGraph) ExtID(idx NodeIndex) NodeID
- func (fg *FlatGraph) LookupIndex(id NodeID) (NodeIndex, bool)
- func (fg *FlatGraph) Neighbors(idx NodeIndex) []NodeIndex
- func (fg *FlatGraph) NodeIDAt(idx NodeIndex) NodeID
type Graph
- func New(repoID string) *Graph
- func (g *Graph) AddCallSite(cs CallSite)
- func (g *Graph) AddEdge(e *Edge)
- func (g *Graph) AddImportAlias(file, alias, importPath string)
- func (g *Graph) AddInstantiatedType(file, typeName string)
- func (g *Graph) AddNode(n *Node)
- func (g *Graph) AddTerraformRef(ref TerraformRef)
- func (g *Graph) AddVarType(file, varName, typeName string)
- func (g *Graph) AllEdges() []*Edge
- func (g *Graph) AllNodes() []*Node
- func (g *Graph) BulkAddCallSites(sites []CallSite)
- func (g *Graph) CacheLen() int
- func (g *Graph) CarveEgoGraph(rootID NodeID, cfg CarveConfig) (*SubGraph, error)
- func (g *Graph) ClearFileSnapshot(file string)
- func (g *Graph) Compact()
- func (g *Graph) CrossDomainImpactForNode(nodeID NodeID) ([]CrossDomainRef, bool)
- func (g *Graph) CrossRepoCalls(primaryRepoID string) (crossCallCount int, linkedRepos []string)
- func (g *Graph) DirectNeighbors(id NodeID) []NodeID
- func (g *Graph) DrainCallSites() []CallSite
- func (g *Graph) DrainTerraformRefs() []TerraformRef
- func (g *Graph) EdgeCount() int
- func (g *Graph) EdgeCountsByType() map[EdgeType]int
- func (g *Graph) EdgesForFile(file string) []*Edge
- func (g *Graph) EnableFlatGraph()
- func (g *Graph) Fanin(id NodeID) int
- func (g *Graph) Fanout(id NodeID) int
- func (g *Graph) FindByFile(filePath string) []*Node
- func (g *Graph) FindByName(name string) []*Node
- func (g *Graph) FindByPattern(pattern string) []*Node
- func (g *Graph) FindByPatternLimit(pattern string, limit int) []*Node
- func (g *Graph) FindByType(t NodeType) []*Node
- func (g *Graph) FindTestsFor(nodeID NodeID) []string
- func (g *Graph) FindTestsWithDistance(nodeID NodeID) []TestRef
- func (g *Graph) GetImportAliases(file string) map[string]string
- func (g *Graph) GetInstantiatedTypes() map[string]bool
- func (g *Graph) GetNode(id NodeID) *Node
- func (g *Graph) GetVarTypes(file string) map[string]string
- func (g *Graph) HasEdge(from, to NodeID, edgeType EdgeType) bool
- func (g *Graph) ImpactAnalysis(rootID NodeID, maxDepth int) (*ImpactResult, error)
- func (g *Graph) InEdges(id NodeID) []*Edge
- func (g *Graph) InEdgesForFile(file string) []*Edge
- func (g *Graph) Index() *GraphIndex
- func (g *Graph) InvalidateCache()
- func (g *Graph) InvalidateCacheForFile(file string)
- func (g *Graph) MakeNodeID(file, name string) NodeID
- func (g *Graph) MergeFrom(other *Graph)
- func (g *Graph) MigrateStableID(n *Node)
- func (g *Graph) NodeCount() int
- func (g *Graph) NodeCountsByDomain() map[DomainType]int
- func (g *Graph) NodesForFile(file string) []*Node
- func (g *Graph) OutEdges(id NodeID) []*Edge
- func (g *Graph) OutEdgesForFile(file string) []*Edge
- func (g *Graph) PeekCallSites() []CallSite
- func (g *Graph) ProjectIdentity() *ProjectIdentity
- func (g *Graph) RebuildIndex() ([]byte, error)
- func (g *Graph) RemoveCallSitesForFile(file string)
- func (g *Graph) RemoveEdge(from, to NodeID, edgeType EdgeType)
- func (g *Graph) RemoveFile(file string)
- func (g *Graph) RemoveTerraformRefsForFile(file string)
- func (g *Graph) RepoID() string
- func (g *Graph) Root() string
- func (g *Graph) SetFileProvenance(filePath string, p ProvenanceType)
- func (g *Graph) SetIndex(idx *GraphIndex)
- func (g *Graph) SetRoot(root string)
- func (g *Graph) SnapshotCallsAdjacency() map[NodeID][]NodeID
- func (g *Graph) SnapshotEdgesAndNodes() (map[NodeID][]*Edge, map[NodeID]*Node)
- func (g *Graph) SnapshotFileStableIDs(file string)
- func (g *Graph) SnapshotImportAdjacency() (map[NodeID][]NodeID, map[NodeID]*Node)
- func (g *Graph) SnapshotImportAliases() map[string]map[string]string
- func (g *Graph) SuggestRules() []SuggestedRule
- func (g *Graph) ToFlatGraph() *FlatGraph
- func (g *Graph) UpdateFileNodeMetadata(absFile string, update func(n *Node))
- func (g *Graph) UpdateNodeMetadata(id NodeID, update func(n *Node))
- func (g *Graph) UpsertRouteNode(n *Node) bool
type GraphIndex
- func LoadSnapshot(data []byte, pool *StringPool) (*GraphIndex, error)
- func (idx *GraphIndex) InNeighbours(seq uint32) (sources []uint32, types []StringID)
- func (idx *GraphIndex) IsTombstoned(seq uint32) bool
- func (idx *GraphIndex) MarkTombstone(seq uint32)
- func (idx *GraphIndex) NodeFile(seq uint32) string
- func (idx *GraphIndex) NodeName(seq uint32) string
- func (idx *GraphIndex) OutNeighbours(seq uint32) (targets []uint32, types []StringID)
- func (idx *GraphIndex) Ready() bool
- func (idx *GraphIndex) ReceiverMethodSeqs(receiverName string) []uint32
- func (idx *GraphIndex) SaveSnapshot() ([]byte, error)
- func (idx *GraphIndex) Seq(nid NodeID) uint32
- func (idx *GraphIndex) TombstoneRatio() float64
- func (idx *GraphIndex) UnsafeInNeighbours(seq uint32) (sources []uint32, types []StringID)
- func (idx *GraphIndex) UnsafeIsTombstoned(seq uint32) bool
- func (idx *GraphIndex) UnsafeOutNeighbours(seq uint32) (targets []uint32, types []StringID)
- func (idx *GraphIndex) UnsafeSeq(nid NodeID) uint32
type GraphSummary
type ImpactResult
type ImpactTier
type Node
type NodeID
type NodeIndex
type NodeType
type ProjectIdentity
type ProvenanceType
type QualityNode
type Scale
type StringID
type StringPool
- func NewStringPool() *StringPool
- func (p *StringPool) Intern(s string) StringID
- func (p *StringPool) Value(id StringID) string
type SubGraph
type SuggestedRule
type TerraformRef
type TestRef

Constants ¶

View Source

const (
	TestPriorityCritical   = "critical"   // distance 1: directly calls changed entity
	TestPriorityLikely     = "likely"     // distance 2: calls through one intermediate
	TestPriorityPeripheral = "peripheral" // distance 3+: transitive dependency
)

Test priority constants for FindTestsWithDistance.

View Source

const MaxPoolSize = 5_000_000

MaxPoolSize is the upper bound on the number of interned strings. Intern returns a ghost ID once this limit is reached, preventing unbounded memory growth in extremely large repositories.

View Source

const ReservedGhostRange = 1000

ReservedGhostRange is the number of StringIDs reserved at the beginning of the pool for transient or unindexed strings (Ghost Nodes). This prevents out-of-bounds panics when an agent requests a file that hasn't been saved to the SQLite BLOB yet.

Variables ¶

View Source

var DefaultEdgeWeights = map[EdgeType]float64{
	EdgeCalls:      1.0,
	EdgeDataFlows:  0.95,
	EdgeImplements: 0.9,
	EdgeEmbeds:     0.85,
	EdgeDependsOn:  0.8,
	EdgeImports:    0.7,
	EdgeExports:    0.5,
	EdgeDefines:    0.15,

	EdgeHandles: 0.9,

	EdgeContains: 0.15,

	EdgeExplains: 0.7,

	EdgeDocumentedBy: 0.6,

	EdgeLinksTo: 0.3,

	EdgeManual: 0.5,

	EdgeDeploys:  0.75,
	EdgeConsumes: 0.75,

	EdgeConfiguredBy: 0.65,
	EdgeDocuments:    0.65,

	EdgeMentions: 0.55,

	EdgeContradicts: 0.6,

	EdgeCausedBy:   0.5,
	EdgeInstanceOf: 0.4,

	EdgeRelatesTo: 0.3,
}

DefaultEdgeWeights defines the semantic significance of each edge type. Higher weight = more relevant when carving context. Configurable via synapses.json.

EdgeDefines is intentionally low (0.15) because file→entity DEFINES edges would otherwise turn every file node into a high-relevance hub, equalising all siblings in a file with equal — and misleading — relevance scores.

View Source

var EdgeTypeCatalog = []EdgeTypeDescriptor{
	{
		Name:           EdgeCalls,
		Description:    "Function or method invocation. Direction: caller → callee. Highest BFS weight — runtime behaviour flows along CALLS edges.",
		SemanticWeight: 1.0,
		Direction:      "directed",
		Domain:         DomainCode,
	},
	{
		Name:           EdgeDataFlows,
		Description:    "Data dependency between entities: a value produced by one entity is consumed by another. Near-highest weight — data-flow edges are critical for debugging and impact analysis.",
		SemanticWeight: 0.95,
		Direction:      "directed",
		Domain:         DomainCode,
	},
	{
		Name:           EdgeImplements,
		Description:    "Struct or type implements an interface. Direction: concrete type → interface. High weight — interface compliance is central to code review and contract analysis.",
		SemanticWeight: 0.9,
		Direction:      "directed",
		Domain:         DomainCode,
	},
	{
		Name:           EdgeHandles,
		Description:    "HTTP/RPC route dispatches to a handler function. Direction: route node → handler. Injected by the R1 heuristic pass (not AST-derived). Confidence stored in route node metadata.",
		SemanticWeight: 0.9,
		Direction:      "directed",
		Domain:         DomainCode,
		Synthetic:      true,
	},
	{
		Name:           EdgeEmbeds,
		Description:    "Struct embeds another struct (Go embedding / composition). Direction: outer struct → embedded struct. High weight — embedding propagates the full method set.",
		SemanticWeight: 0.85,
		Direction:      "directed",
		Domain:         DomainCode,
	},
	{
		Name:           EdgeDependsOn,
		Description:    "Explicit dependency relationship between entities or modules. Broader than CALLS — captures package-level or declarative dependencies not visible as direct call sites.",
		SemanticWeight: 0.8,
		Direction:      "directed",
		Domain:         DomainCode,
	},
	{
		Name:           EdgeDeploys,
		Description:    "Code entity deploys an infrastructure resource. Direction: code entity → Terraform/k8s resource node. Strong cross-domain dependency — code changes may break deployed infrastructure.",
		SemanticWeight: 0.75,
		Direction:      "directed",
		Domain:         DomainInfra,
		Synthetic:      true,
	},
	{
		Name:           EdgeConsumes,
		Description:    "Code entity calls or depends on an API endpoint or service. Direction: code entity → OpenAPI endpoint / gRPC service node. Strong cross-domain dependency — API changes break consuming code.",
		SemanticWeight: 0.75,
		Direction:      "directed",
		Domain:         DomainAPI,
		Synthetic:      true,
	},
	{
		Name:           EdgeImports,
		Description:    "Source file or package imports another package. Direction: importer → imported package node. Lower weight than CALLS — import edges are structurally noisy (every file that uses a stdlib type gets an edge).",
		SemanticWeight: 0.7,
		Direction:      "directed",
		Domain:         DomainCode,
	},
	{
		Name:           EdgeExplains,
		Description:    "Documentation section describes a code entity (R31). Direction: Section node → code entity. Moderate weight — doc context is valuable but secondary to structural code edges.",
		SemanticWeight: 0.7,
		Direction:      "directed",
		Domain:         DomainDocs,
		Synthetic:      true,
	},
	{
		Name:           EdgeConfiguredBy,
		Description:    "Code entity is controlled by a configuration resource. Direction: code entity → Terraform variable / k8s ConfigMap / config file node. Cross-domain — config changes can silently break code behaviour.",
		SemanticWeight: 0.65,
		Direction:      "directed",
		Domain:         DomainInfra,
		Synthetic:      true,
	},
	{
		Name:           EdgeDocuments,
		Description:    "Documentation section describes a cross-domain entity (broader than EXPLAINS). Direction: docs section → any entity (code, infra, API). Used for README sections that describe Terraform modules or API specs.",
		SemanticWeight: 0.65,
		Direction:      "directed",
		Domain:         DomainDocs,
		Synthetic:      true,
	},
	{
		Name:           EdgeDocumentedBy,
		Description:    "Reverse of EXPLAINS: code entity references its documentation section (R31). Direction: code entity → Section node. Slightly lower than EXPLAINS so code-to-code edges are preferred under token budget pressure.",
		SemanticWeight: 0.6,
		Direction:      "directed",
		Domain:         DomainDocs,
		Synthetic:      true,
	},
	{
		Name:           EdgeContradicts,
		Description:    "Sprint 17 NL-to-graph: two entities express conflicting information (e.g. two doc sections with incompatible claims). Direction: newer/conflicting entity → established entity. Weight reflects the high signal value of detected contradictions.",
		SemanticWeight: 0.6,
		Direction:      "directed",
		Domain:         DomainKnowledge,
		Synthetic:      true,
	},
	{
		Name:           EdgeMentions,
		Description:    "Synthetic cross-domain name-match edge. Direction: any entity → any entity across domain boundary. Created by the name-matching background pass when two entities share the same identifier across domains. Confidence (0.0–1.0) stored in edge metadata; only edges with confidence ≥ 0.6 are auto-created.",
		SemanticWeight: 0.55,
		Direction:      "directed",
		Domain:         DomainKnowledge,
		Synthetic:      true,
	},
	{
		Name:           EdgeExports,
		Description:    "Module or file exports an identifier. Direction: file/module → exported symbol. Medium-low weight — captures public API surface without dominating BFS traversal.",
		SemanticWeight: 0.5,
		Direction:      "directed",
		Domain:         DomainCode,
	},
	{
		Name:           EdgeManual,
		Description:    "User-defined cross-domain relationship created via link_entities. Used when no standard edge type applies. Medium BFS weight (0.5) — traversed but lower priority than structural code edges.",
		SemanticWeight: 0.5,
		Direction:      "directed",
		Domain:         DomainCustom,
		Synthetic:      true,
	},
	{
		Name:           EdgeCausedBy,
		Description:    "Sprint 17 NL-to-graph: causal relationship between entities extracted from documentation. Direction: effect → cause (e.g. OutOfMemoryError caused_by LeakedConnection). Enables root-cause traversal in knowledge graph queries.",
		SemanticWeight: 0.5,
		Direction:      "directed",
		Domain:         DomainKnowledge,
		Synthetic:      true,
	},
	{
		Name:           EdgeInstanceOf,
		Description:    "Sprint 17 NL-to-graph: type hierarchy relationship extracted from documentation. Direction: specific → general (e.g. Redis instance_of CacheSystem). Lower weight than structural relationships — type hierarchy is contextual, not runtime-critical.",
		SemanticWeight: 0.4,
		Direction:      "directed",
		Domain:         DomainKnowledge,
		Synthetic:      true,
	},
	{
		Name:           EdgeLinksTo,
		Description:    "Markdown cross-document link (R31). Direction: source document/section → target document node. Lowest semantic weight among doc edges — navigation structure, not content relationship.",
		SemanticWeight: 0.3,
		Direction:      "directed",
		Domain:         DomainDocs,
		Synthetic:      true,
	},
	{
		Name:           EdgeRelatesTo,
		Description:    "Sprint 17 NL-to-graph: generic fallback relationship between knowledge entities when no more specific type applies. Direction: source entity → related entity. Created by Tier 0 heuristic extraction; may be upgraded to a typed edge by Tier 2 LLM classification.",
		SemanticWeight: 0.3,
		Direction:      "directed",
		Domain:         DomainKnowledge,
		Synthetic:      true,
	},
	{
		Name:           EdgeContains,
		Description:    "Document file contains a section, or parent section contains a subsections (R31). Direction: doc file/section → child section. Structural edge — same intentionally low weight as DEFINES to avoid hub inflation.",
		SemanticWeight: 0.15,
		Direction:      "directed",
		Domain:         DomainDocs,
		Synthetic:      true,
	},
	{
		Name:           EdgeDefines,
		Description:    "Source file defines a code entity. Direction: file node → entity node. Lowest weight — every entity has exactly one DEFINES edge, so including it at higher weight would uniformly equalise all siblings in a file.",
		SemanticWeight: 0.15,
		Direction:      "directed",
		Domain:         DomainCode,
	},
}

EdgeTypeCatalog is the authoritative registry of all edge types in the graph. Every entry in DefaultEdgeWeights must have a corresponding descriptor here — the TestEdgeTypeCatalogCompleteness test enforces this invariant at test time.

Sprint 16 adds: DEPLOYS, CONSUMES, CONFIGURED_BY (code-to-infra/api), DOCUMENTS (docs-to-code), MENTIONS (cross-domain name match). When new edge types are added, append a descriptor here AND add to DefaultEdgeWeights.

View Source

var Pool = NewStringPool()

Pool is the global instance of the StringPool accessed by FlatGraph.

Functions ¶

func CrossDomainCategory ¶

func CrossDomainCategory(et EdgeType) string

CrossDomainCategory returns the human-readable category for a cross-domain edge type.

func ExportDOT ¶

func ExportDOT(nodes []*Node, edges []*Edge, repoRoot string, includeMeta bool) string

ExportDOT serialises nodes and edges as a Graphviz DOT digraph. repoRoot is stripped from file paths for readability; pass "" to skip. includeMeta adds signature metadata to node labels when present.

func ExportGraphML ¶

func ExportGraphML(nodes []*Node, edges []*Edge, repoRoot string) string

ExportGraphML serialises nodes and edges as GraphML XML.

func ExportMermaid ¶

func ExportMermaid(nodes []*Node, edges []*Edge, repoRoot string, includeMeta bool) string

ExportMermaid serialises nodes and edges as a Mermaid LR flowchart.

func IntentCarveWeights ¶

func IntentCarveWeights(intent string) map[EdgeType]float64

IntentCarveWeights returns the pre-allocated edge weight map for the given intent. These maps are package-level vars — zero allocation at call time. Falls back to DefaultEdgeWeights for unknown intents.

func IntentDirectionBoost ¶

func IntentDirectionBoost(intent string) float64

IntentDirectionBoost returns the DirectionBoost value for the given intent. Positive = prefer callees, negative = prefer callers, 0 = balanced.

func IsCrossDomainEdge ¶

func IsCrossDomainEdge(et EdgeType) bool

IsCrossDomainEdge returns true for edge types that connect entities across knowledge domain boundaries (code ↔ infra ↔ api ↔ docs ↔ knowledge ↔ custom). Used by collectCrossDomainImpact for one-hop impact detection.

Note: BFS/PPR cross-domain decay is applied based on node.Domain comparison (currNode.Domain != neighNode.Domain), not on edge type. This function is not called in the BFS/PPR hot path — it classifies edge types for impact analysis.

func NodeTypeToUint8 ¶

func NodeTypeToUint8(nt NodeType) uint8

NodeTypeToUint8 maps a NodeType to its serialized uint8 value.

func Serialize ¶

func Serialize(fg *FlatGraph, w io.Writer) error

Serialize writes fg to w in zstd-compressed binary format, mirroring Deserialize. The format is suitable for fast cold-start graph reloads.

Types ¶

type APISurfaceInfo ¶

type APISurfaceInfo struct {
	Exported         bool   `json:"exported"`
	ExternalPackages int    `json:"external_packages"` // number of packages that import this entity's package
	BreakingRisk     string `json:"breaking_risk"`     // "high" (exported, many consumers), "medium" (exported, few), "low" (not exported)
}

APISurfaceInfo describes whether an entity is part of the public API and how many external consumers depend on it.

type BulkEdge ¶

type BulkEdge struct {
	From, To NodeIndex
	Weight   float32
}

BulkEdge is a single edge for bulk insertion.

type CallSite ¶

type CallSite struct {
	CallerID   NodeID // node ID of the calling function/method
	CallerFile string // absolute path of the file containing the caller
	PkgAlias   string // "" for direct calls; "pkg" for pkg.Func() qualified calls
	FuncName   string // name of the function/method being called
}

CallSite records an unresolved function call encountered during parsing. The resolver drains these after all files are parsed and creates CALLS edges.

type CarveConfig ¶

type CarveConfig struct {
	// MaxDepth is the maximum number of hops from the root node.
	MaxDepth int
	// TokenBudget caps the approximate output size in tokens (1 token ≈ 4 chars).
	TokenBudget int
	// EdgeWeights overrides DefaultEdgeWeights. Nil means use defaults.
	EdgeWeights map[EdgeType]float64
	// DecayFactor is multiplied per hop: relevance = weight × (decay ^ hop).
	DecayFactor float64
	// MinRelevance drops any node whose relevance score falls below this threshold
	// before the token-budget cut is applied. Prevents low-signal siblings and
	// package-import nodes from crowding out actual dependencies.
	// See DefaultCarveConfig() for tuning guidance (BFS vs PPR interaction).
	MinRelevance float64
	// ExcludeTypes lists node types to omit from the response. These nodes are
	// still traversed during BFS (so edges through them are discovered) but are
	// never emitted to the caller. Defaults to {NodePackage, NodeFile} so that
	// stdlib imports and file hub-nodes do not waste the token budget.
	ExcludeTypes map[NodeType]bool
	// ExcludeTestFiles omits nodes whose source file ends in _test.go from the
	// output. The nodes are still BFS-traversed (so their edges are discovered)
	// but they are never emitted to the caller. Defaults to true so that test
	// functions do not crowd the related bucket for well-tested codebases.
	ExcludeTestFiles bool
	// DirectionBoost applies a relevance multiplier along the CALLS direction.
	// Positive: boosts outgoing (callee) edges — token-budget pruner prefers
	// what this node calls. Negative: boosts incoming (caller) edges — pruner
	// prefers what calls this node. 0 disables directional preference. Default: 0.2.
	DirectionBoost float64
	// IntentID is an optional cache-key discriminator for intent-specific configs.
	// When EdgeWeights are overridden per-intent, set this to the intent string
	// (e.g. "modify", "debug") so that intent-specific subgraphs are cached
	// separately and do not collide with the default or other intents.
	IntentID string
	// UsePPR switches the traversal engine from BFS to Personalized PageRank.
	// PPR captures multi-path importance: a node reached via N independent call
	// chains scores N× higher than a structurally equivalent single-path node.
	// BFS max-score heuristic cannot represent this. Default: false (BFS).
	// Validated by Sprint 13 #1 spike (diamond 4.69×, wide-fan 5.68× PPR boost).
	UsePPR bool
	// Alpha is the PPR teleport probability — the chance the random walk jumps
	// back to the root (personalized restart) at each step. Higher alpha means
	// tighter focus on root with shorter effective reach. Default: 0.15
	// (standard PageRank restart rate). Only used when UsePPR=true.
	// Values outside (0,1) are clamped to 0.15.
	Alpha float64
	// EmbeddingLookup batch-fetches pre-normalized float32 embedding vectors for
	// a set of node IDs. Called once after BFS/PPR with all scored node IDs.
	// IDs with no stored embedding are omitted from the result map. Nil disables
	// semantic hybrid scoring (pure structural — backward-compatible default).
	EmbeddingLookup func(ids []NodeID) map[NodeID][]float32
	// HybridLambda controls the semantic blend weight applied after BFS/PPR:
	//   finalScore = (1-λ)×structural + λ×cosineSim(embed(root), embed(n))
	// Range [0, 1]. 0 = pure structural (default). Ignored when EmbeddingLookup
	// is nil or the root node has no stored embedding.
	// Recommended production value: 0.3 (70% structural, 30% semantic).
	HybridLambda float64
	// QualityScoreLookup returns per-entity context quality scores keyed by
	// node ID. Scores are the signed sum of signal_weight values from
	// outcome_signals (Sprint 15 #1/2). Positive = context was consistently
	// helpful; negative = context was repeatedly insufficient or abandoned.
	// Called once after BFS/PPR scoring with all surviving nodes.
	// Each QualityNode carries the ID, Name, and File so closures can convert
	// to entityWithPath format without re-acquiring the graph read lock (which
	// would deadlock — CarveEgoGraph already holds g.mu.RLock when calling this).
	// Nil disables quality-based re-ranking (backward-compatible default).
	QualityScoreLookup func(nodes []QualityNode) map[NodeID]float64
	// CrossDomainDecay is a multiplier applied to relevance when BFS/PPR crosses
	// a domain boundary (e.g., code→infra, code→api). Range (0, 1].
	// A value of 0.5 (default) means cross-domain neighbors score at half the
	// relevance of same-domain neighbors at the same structural distance.
	// This keeps same-domain code nodes higher in the ranking while still
	// surfacing cross-domain context at meaningfully lower relevance.
	// 0 disables the domain-boundary penalty (treats all edges equally).
	// Values ≥ 1 are clamped to 1.0 (no penalty — backward compatible).
	CrossDomainDecay float64
	// LearnedEdgeWeights contains per-specific-edge weight multipliers derived
	// from historical task outcomes (Sprint 15 #3). When traversing edge
	// (From→To, Type), the base edgeWeight is multiplied by this value.
	// A multiplier of 1.0 is neutral; >1.0 boosts the edge; <1.0 penalises it.
	// Cap: 2.0x boost, floor: 0.3x penalty. Nil disables learned-weight
	// adjustments (backward-compatible default).
	LearnedEdgeWeights map[EdgeWeightKey]float64
	// LearnedEdgeWeightsVersion is the store's monotonic write counter at the
	// time LearnedEdgeWeights was loaded. It is included in the subgraph cache
	// key so that cached subgraphs are automatically invalidated after any write
	// to the edge_learned_weights table — regardless of whether the map has the
	// same number of entries (len-based discrimination is not sufficient).
	LearnedEdgeWeightsVersion int64
}

CarveConfig controls how an ego-subgraph is extracted for a query node.

func DefaultCarveConfig ¶

func DefaultCarveConfig() CarveConfig

DefaultCarveConfig returns sensible defaults for context carving.

MinRelevance / PPR interaction:

BFS path: MinRelevance=0.01 prunes nodes whose relevance has decayed below 1% of root. With decay=0.5 and a 16K-edge graph this allows ~6 hops for narrow chains and ~3 hops for hub nodes (degree-normalized adaptive decay). Raising MinRelevance tightens the subgraph; lowering it risks hub explosion.
PPR path (UsePPR=true): power iteration assigns near-zero scores to distant nodes naturally — MinRelevance=0.01 trims the long tail without aggressive pruning. The spike benchmark (ppr_spike_test.go) validated this threshold against diamond and wide-fan graph topologies. Lowering below 0.001 has negligible recall gain with O(N) cost. Raising above 0.05 risks losing semantically adjacent nodes in sparse subgraphs.

Recommended tuning guide:

Default (0.01) — correct for most codebases up to ~50K nodes.
Dense monorepos (>100K edges): raise to 0.03–0.05 to keep carves fast.
Sparse/small repos (<1K nodes): lower to 0.005 to improve recall depth.

type CarvedNode ¶

type CarvedNode struct {
	Node      *Node   `json:"node"`
	Relevance float64 `json:"relevance"`
	Hop       int     `json:"hop"`
}

CarvedNode is a node annotated with its relevance score and hop distance from the query root, as computed during a carving traversal.

type CrossDomainContext ¶

type CrossDomainContext struct {
	Deploys      []CarvedNode `json:"deploys,omitempty"`
	Consumes     []CarvedNode `json:"consumes,omitempty"`
	ConfiguredBy []CarvedNode `json:"configured_by,omitempty"`
	DocumentedIn []CarvedNode `json:"documented_in,omitempty"`
	Mentions     []CarvedNode `json:"mentions,omitempty"`
	Manual       []CarvedNode `json:"manual,omitempty"`
	Related      []CarvedNode `json:"related,omitempty"` // multi-hop or no direct edge from root
}

CrossDomainContext groups cross-domain CarvedNodes from a BFS/PPR subgraph by their relationship to the root entity. Used by directionalContext in get_context responses. Each sub-slice preserves BFS Relevance scores for ranking within the sub-bucket.

Nodes connected via a direct edge from/to root are categorized by edge type. Multi-hop cross-domain nodes with no direct root edge go into Related.

func (*CrossDomainContext) IsEmpty ¶

func (c *CrossDomainContext) IsEmpty() bool

IsEmpty returns true when all sub-buckets are empty.

type CrossDomainRef ¶

type CrossDomainRef struct {
	EntityRef
	// EdgeType is the cross-domain edge type that led to this entity
	// (e.g. "DEPLOYS", "CONSUMES", "CONFIGURED_BY", "DOCUMENTS", "MENTIONS", "MANUAL").
	EdgeType EdgeType `json:"edge_type"`
	// Category is a human-readable grouping derived from EdgeType:
	// "infra" (DEPLOYS), "api" (CONSUMES), "config" (CONFIGURED_BY),
	// "docs" (DOCUMENTS), "related" (MENTIONS/MANUAL).
	Category string `json:"category"`
}

CrossDomainRef is a single entity reached via a cross-domain edge during impact analysis. Category groups the finding by relationship type so agents can answer "what Terraform resources does this deploy to?" etc.

type DomainType ¶

type DomainType string

DomainType classifies which knowledge domain a graph node belongs to. Code is the default domain; future parsers and connectors set other domains so that infrastructure, API, doc, and issue nodes can coexist in the same graph.

const (
	// DomainCode is the default: source-code entities (functions, structs, etc.).
	DomainCode DomainType = "code"
	// DomainInfra represents infrastructure resources (Terraform, k8s, Docker).
	DomainInfra DomainType = "infra"
	// DomainAPI represents API schema entities (OpenAPI endpoints, gRPC services).
	DomainAPI DomainType = "api"
	// DomainDocs represents documentation sections (Markdown, wikis).
	DomainDocs DomainType = "docs"
	// DomainIssues represents external tickets and issues (GitHub, Linear, Jira).
	DomainIssues DomainType = "issues"
	// DomainCustom is a catch-all for user-defined domain parsers and connectors.
	DomainCustom DomainType = "custom"
	// DomainKnowledge represents cross-domain or meta-level relationships.
	// Used by synthetic edges (e.g. MENTIONS) that bridge two existing-domain entities
	// rather than belonging to any single domain. Sprint 16.
	DomainKnowledge DomainType = "knowledge"
)

type Edge ¶

type Edge struct {
	From NodeID   `json:"from"`
	To   NodeID   `json:"to"`
	Type EdgeType `json:"type"`
}

Edge represents a directed relationship between two nodes.

type EdgeIndex ¶

type EdgeIndex uint32

EdgeIndex represents a directed adjacency connection.

type EdgeType ¶

type EdgeType string

EdgeType classifies the relationship between two nodes.

const (
	EdgeImports    EdgeType = "IMPORTS"
	EdgeCalls      EdgeType = "CALLS"
	EdgeImplements EdgeType = "IMPLEMENTS"
	EdgeDefines    EdgeType = "DEFINES"
	EdgeEmbeds     EdgeType = "EMBEDS"
	EdgeDependsOn  EdgeType = "DEPENDS_ON"
	EdgeExports    EdgeType = "EXPORTS"
	EdgeDataFlows  EdgeType = "DATA_FLOWS"
	// EdgeHandles is a synthetic edge injected by the heuristic pass (R1).
	// Direction: routeNode --HANDLES--> handlerFunction.
	// Represents framework routing registration: "this route dispatches to this handler."
	// Confidence is stored in the route node's metadata (key "confidence").
	EdgeHandles EdgeType = "HANDLES"
	// R31: Documentation graph edges.
	// EdgeContains links a document file to its section nodes (doc→section)
	// and parent sections to child subsections (section→subsection).
	EdgeContains EdgeType = "CONTAINS"
	// EdgeExplains links a documentation section to a code entity it describes.
	// Direction: Section → code entity. Created by ResolveDocEdges post-parse.
	EdgeExplains EdgeType = "EXPLAINS"
	// EdgeDocumentedBy is the reverse of EXPLAINS: code entity → Section.
	// Enables get_context to surface documentation for any queried code entity.
	EdgeDocumentedBy EdgeType = "DOCUMENTED_BY"
	// EdgeLinksTo connects document nodes via markdown [text](path.md) links.
	// Direction: source document/section → target document node.
	EdgeLinksTo EdgeType = "LINKS_TO"
	// EdgeManual is a user-defined relationship created via link_entities.
	// Used when the relation string doesn't match a known catalog type.
	// BFS weight 0.5 — traversed but lower priority than structural code edges.
	EdgeManual EdgeType = "MANUAL"

	// EdgeDeploys links a code entity to the infrastructure resource that deploys it.
	// Direction: code entity → Terraform/k8s resource.
	EdgeDeploys EdgeType = "DEPLOYS"
	// EdgeConsumes links a code entity to the API endpoint or service it calls.
	// Direction: code entity → OpenAPI endpoint / gRPC service node.
	EdgeConsumes EdgeType = "CONSUMES"
	// EdgeConfiguredBy links a code entity to the config resource that controls it.
	// Direction: code entity → config resource (Terraform variable, k8s ConfigMap, etc.).
	EdgeConfiguredBy EdgeType = "CONFIGURED_BY"
	// EdgeDocuments links a documentation section to the code entity it describes.
	// Direction: docs section → code entity. Broader than EXPLAINS — used for
	// cross-domain docs (e.g. a README section about a Terraform module).
	EdgeDocuments EdgeType = "DOCUMENTS"
	// EdgeMentions is a synthetic cross-domain name-match edge.
	// Direction: any entity → any entity (cross-domain). Created by the name-matching
	// background pass (Sprint 16 #2) when two entities share the same name across domains.
	// Confidence 0.0–1.0 stored in edge metadata; only edges with confidence ≥ 0.6 are
	// auto-created. BFS weight is lower than structural edges to reflect uncertainty.
	EdgeMentions EdgeType = "MENTIONS"

	// EdgeContradicts links two entities that express conflicting information
	// (e.g. two doc sections making incompatible claims about a system).
	// Direction: newer/conflicting entity → established entity.
	EdgeContradicts EdgeType = "CONTRADICTS"

	// EdgeCausedBy links an effect entity to its cause
	// (e.g. "OutOfMemoryError" caused_by "LeakedConnection").
	// Direction: effect → cause.
	EdgeCausedBy EdgeType = "CAUSED_BY"

	// EdgeInstanceOf links a specific entity to its general type or category
	// (e.g. "Redis" instance_of "CacheSystem").
	// Direction: specific → general.
	EdgeInstanceOf EdgeType = "INSTANCE_OF"

	// EdgeRelatesTo is the generic fallback relationship for NL-extracted edges
	// where no more specific type applies. Used when Tier 2 classification is
	// unavailable or returns an unrecognized type.
	// Direction: source entity → related entity.
	EdgeRelatesTo EdgeType = "RELATES_TO"
)

Edge type constants: classify the relationship between two graph nodes.

type EdgeTypeDescriptor ¶

type EdgeTypeDescriptor struct {
	// Name is the EdgeType constant value (e.g. "CALLS").
	Name EdgeType `json:"name"`
	// Description is a human-readable explanation of what this edge means.
	Description string `json:"description"`
	// SemanticWeight is the default BFS traversal weight (matches DefaultEdgeWeights).
	// Higher weight = edge traversed first and contributes more relevance to reachable nodes.
	SemanticWeight float64 `json:"semantic_weight"`
	// Direction is always "directed" for the current graph model.
	// Reserved for future bidirectional edge types (e.g. cross-domain MENTIONS).
	Direction string `json:"direction"`
	// Domain classifies which knowledge domain this edge belongs to.
	// Uses the same values as DomainType constants: DomainCode, DomainDocs,
	// DomainInfra, DomainAPI, DomainKnowledge, DomainIssues, DomainCustom.
	// Sprint 16 added infra, api, and knowledge domain edges (DEPLOYS, CONSUMES,
	// CONFIGURED_BY, DOCUMENTS, MENTIONS).
	Domain DomainType `json:"domain"`
	// Synthetic marks edges injected by heuristic passes rather than derived from the AST.
	// Synthetic edges carry an inherent confidence < 1.0 (stored in node metadata).
	Synthetic bool `json:"synthetic,omitempty"`
}

EdgeTypeDescriptor captures the semantic metadata for a single edge type. The catalog is the authoritative source for BFS weights, domain tags, and human-readable descriptions — avoiding the need to scatter this information across multiple maps and comments throughout the codebase.

func GetEdgeTypes ¶

func GetEdgeTypes() []EdgeTypeDescriptor

GetEdgeTypes returns a copy of the EdgeTypeCatalog slice. Callers may safely range over or index the result without mutating shared state.

type EdgeWeightKey ¶

type EdgeWeightKey struct {
	From NodeID
	To   NodeID
	Type EdgeType
}

EdgeWeightKey uniquely identifies a specific directed edge in the graph. Used as a map key for per-edge learned weight multipliers (Sprint 15 #3).

type EntityInfo ¶

type EntityInfo struct {
	EntityRef
	Fanin  int `json:"fanin"`
	Fanout int `json:"fanout"`
}

EntityInfo extends EntityRef with connectivity metrics.

type EntityRef ¶

type EntityRef struct {
	ID   NodeID   `json:"id"`
	Name string   `json:"name"`
	Type NodeType `json:"type"`
	File string   `json:"file"`
	Line int      `json:"line"`
}

EntityRef is a minimal reference to a node, used for lists like entry points.

type ErrNodeNotFound ¶

type ErrNodeNotFound NodeID

ErrNodeNotFound is returned when a query targets a non-existent node.

func (ErrNodeNotFound) Error ¶

func (e ErrNodeNotFound) Error() string

type FlatGraph ¶

type FlatGraph struct {
	RepoID string

	// Names holds compact IDs into the String Interning Pool.
	Names []StringID

	// Types holds the NodeType enum directly.
	Types []NodeType

	// FileIDs maps each node to the source file string ID.
	FileIDs []StringID

	// NamespaceIDs enables multi-monolith cross-linking without changing core logic.
	NamespaceIDs []uint16

	// Tombstones is a bitset-like array. If true, the node was deleted during
	// an incremental file parse. Array compaction happens in the background.
	Tombstones []bool

	// TombstoneCount tracks how many nodes are deleted. When >15%, compaction is triggered.
	TombstoneCount int

	// OutEdges stores the destination NodeIndex of all outgoing edges continuously.
	OutEdges []NodeIndex
	// OutWeights stores the Semantic EdgeWeight for each corresponding edge in OutEdges.
	OutWeights []float32
	// OutOffsets denotes the starts and ends of a specific node's edges in the OutEdges slice.
	// Node `i`'s edges are in OutEdges[OutOffsets[i] : OutOffsets[i+1]]
	OutOffsets []uint64

	// InEdges (Incoming edges) built identically to OutEdges for reverse lookups.
	InEdges   []NodeIndex
	InWeights []float32
	InOffsets []uint64
	// contains filtered or unexported fields
}

FlatGraph is the V2 "Deterministic Core" engine. It uses a Struct-of-Arrays (SoA) layout instead of pointer-heavy maps. This ensures continuous memory allocation, maximizing CPU cache locality for BFS traversals and preventing GC pauses during million-node loads.

func Deserialize ¶

func Deserialize(r io.Reader) (*FlatGraph, error)

Deserialize reads the zstd BLOB and reconstructs the global FlatGraph.

func NewFlatGraph ¶

func NewFlatGraph(repoID string) *FlatGraph

NewFlatGraph initializes an empty SoA Graph structure.

func (*FlatGraph) AddNode ¶

func (fg *FlatGraph) AddNode(name StringID, nodeType NodeType, fileID StringID, nsID uint16) NodeIndex

AddNode appends a new node into the SoA structure.

func (*FlatGraph) BulkAddEdges ¶

func (fg *FlatGraph) BulkAddEdges(edges []BulkEdge) int

BulkAddEdges rebuilds CSR arrays from scratch given a sorted list of edges. This is O(E + N) instead of O(E * N) for AddEdge called E times.

func (*FlatGraph) ExtID ¶

func (fg *FlatGraph) ExtID(idx NodeIndex) NodeID

ExtID maps our compact NodeIndex back to the stable string-based NodeID required by the MCP protocol communication.

func (*FlatGraph) LookupIndex ¶

func (fg *FlatGraph) LookupIndex(id NodeID) (NodeIndex, bool)

LookupIndex returns the NodeIndex for a NodeID, or (0, false) if not found.

func (*FlatGraph) Neighbors ¶

func (fg *FlatGraph) Neighbors(idx NodeIndex) []NodeIndex

Neighbors returns the NodeIndex values for all undirected (out+in) neighbors of the given node index. Used by the PPR BFS fast path.

func (*FlatGraph) NodeIDAt ¶

func (fg *FlatGraph) NodeIDAt(idx NodeIndex) NodeID

NodeIDAt returns the original graph NodeID for a NodeIndex, or "" if out of range. Uses the nodeIDs slice populated by Graph.EnableFlatGraph.

type Graph ¶

type Graph struct {
	// contains filtered or unexported fields
}

Graph is the core in-memory code graph. It is safe for concurrent reads and writes — a RWMutex serialises mutations while allowing parallel queries.

func New ¶

func New(repoID string) *Graph

New creates an empty Graph for the given repository identifier.

func (*Graph) AddCallSite ¶

func (g *Graph) AddCallSite(cs CallSite)

AddCallSite records an unresolved call site for post-parse resolution.

func (*Graph) AddEdge ¶

func (g *Graph) AddEdge(e *Edge)

AddEdge inserts a directed edge. Both endpoint nodes must already exist; if either is absent the edge is silently dropped to avoid dangling refs. Duplicate edges (same From, To, Type) are silently dropped so that repeated calls from incremental reindex or heuristic passes are idempotent.

func (*Graph) AddImportAlias ¶

func (g *Graph) AddImportAlias(file, alias, importPath string)

AddImportAlias records that file uses alias as the identifier for importPath. Called by Go parser when an explicit import alias is present (e.g., `import alias "github.com/foo/bar"`).

func (*Graph) AddInstantiatedType ¶

func (g *Graph) AddInstantiatedType(file, typeName string)

AddInstantiatedType records that typeName is explicitly constructed in file. Called by language parsers when they encounter constructor expressions (Java: new Foo(), TypeScript: new Foo()). Used by the resolver for RTA-style call graph refinement.

func (*Graph) AddNode ¶

func (g *Graph) AddNode(n *Node)

AddNode inserts or replaces a node. If a node with the same ID already exists it is overwritten — the caller is responsible for deduplication. A stable UUID is generated for n.StableID if it is empty.

func (*Graph) AddTerraformRef ¶

func (g *Graph) AddTerraformRef(ref TerraformRef)

AddTerraformRef records an unresolved Terraform resource reference for post-parse cross-file DEPENDS_ON resolution.

func (*Graph) AddVarType ¶

func (g *Graph) AddVarType(file, varName, typeName string)

AddVarType records that variable varName in file has type typeName. Called by language parsers during AST traversal to enable cross-file obj.method() resolution in the post-parse resolver pass.

func (*Graph) AllEdges ¶

func (g *Graph) AllEdges() []*Edge

AllEdges returns a snapshot of every edge in the graph, sorted by From, To, Type.

func (*Graph) AllNodes ¶

func (g *Graph) AllNodes() []*Node

AllNodes returns a snapshot of every node in the graph.

func (*Graph) BulkAddCallSites ¶

func (g *Graph) BulkAddCallSites(sites []CallSite)

BulkAddCallSites appends multiple call sites in a single lock acquisition. Used by the watcher to re-register stored call sites from other files before a resolver pass so that ResolveCallEdges can recreate CALLS edges pointing into a file that was just re-parsed (those edges were deleted by RemoveFile).

func (*Graph) CacheLen ¶

func (g *Graph) CacheLen() int

CacheLen returns the number of entries in the subgraph cache (P9-8).

func (*Graph) CarveEgoGraph ¶

func (g *Graph) CarveEgoGraph(rootID NodeID, cfg CarveConfig) (*SubGraph, error)

CarveEgoGraph extracts a relevance-ranked subgraph centred on the given root node.

When cfg.UsePPR is false (default), the algorithm is:

BFS outward from root, up to cfg.MaxDepth hops.
Each node is assigned a relevance score: relevance = edgeTypeWeight(edge) × (cfg.DecayFactor ^ hopCount)
When a node is reachable via multiple paths the maximum score is kept.
If the estimated token cost exceeds cfg.TokenBudget, the lowest-scored nodes are pruned (highest-hop, lowest-weight first).
Only edges where both endpoints survived pruning are included.

When cfg.UsePPR is true, step 1-3 are replaced by Personalized PageRank (see pprScores). Steps 4-5 are identical. PPR captures multi-path importance that BFS max-score heuristic cannot represent.

func (*Graph) ClearFileSnapshot ¶

func (g *Graph) ClearFileSnapshot(file string)

ClearFileSnapshot removes the stable ID snapshot for a file once migration is complete. Optional — snapshots are small and automatically replaced on the next SnapshotFileStableIDs call for the same file.

func (*Graph) Compact ¶

func (g *Graph) Compact()

Compact recreates the internal maps from scratch, allowing the Go runtime to release memory from deleted map buckets. Go maps do not shrink after deletions, so after thousands of incremental re-parses memory trends upward. Call this periodically (e.g. after a full reindex) to reclaim that memory.

func (*Graph) CrossDomainImpactForNode ¶

func (g *Graph) CrossDomainImpactForNode(nodeID NodeID) ([]CrossDomainRef, bool)

CrossDomainImpactForNode returns the cross-domain entities directly reachable from nodeID via cross-domain edges. It is the public entry point used by the struct/interface aggregation path in handleGetImpact. Returns the refs and a truncated flag (true when capped at maxCrossDomainImpactNodes).

func (*Graph) CrossRepoCalls ¶

func (g *Graph) CrossRepoCalls(primaryRepoID string) (crossCallCount int, linkedRepos []string)

CrossRepoCalls returns statistics about cross-repository CALLS edges. It iterates the internal edge map directly without allocating a snapshot slice. The returned linkedRepos slice is sorted and excludes primaryRepoID.

func (*Graph) DirectNeighbors ¶

func (g *Graph) DirectNeighbors(id NodeID) []NodeID

DirectNeighbors returns deduplicated NodeIDs that are 1-hop away from id via any edge direction. Uses the FlatGraph fast path when available, otherwise falls back to OutEdges + InEdges. Returns nil if id is unknown.

func (*Graph) DrainCallSites ¶

func (g *Graph) DrainCallSites() []CallSite

DrainCallSites returns all pending call sites and clears the internal list. Called by the resolver after all files have been parsed.

func (*Graph) DrainTerraformRefs ¶

func (g *Graph) DrainTerraformRefs() []TerraformRef

DrainTerraformRefs returns all pending Terraform refs and clears the list. Must be called after all .tf files have been parsed.

func (*Graph) EdgeCount ¶

func (g *Graph) EdgeCount() int

EdgeCount returns the total number of edges.

func (*Graph) EdgeCountsByType ¶

func (g *Graph) EdgeCountsByType() map[EdgeType]int

EdgeCountsByType returns the number of edges per edge type.

func (*Graph) EdgesForFile ¶

func (g *Graph) EdgesForFile(file string) []*Edge

EdgesForFile returns all edges where at least one endpoint belongs to the given file: outgoing edges from file nodes and incoming edges to file nodes. Self-edges within the same file appear exactly once. Complexity is O(total_nodes + file_edges) — significantly cheaper than AllEdges() + filter (O(E)) for per-file violation and analysis passes.

func (*Graph) EnableFlatGraph ¶

func (g *Graph) EnableFlatGraph()

EnableFlatGraph builds the FlatGraph from the current graph state and stores it for use as the PPR BFS fast path. Safe to call at any time; rebuilds atomically. The heavy construction runs outside g.mu; only the final pointer swap requires a write lock. Idempotent — calling multiple times is safe.

func (*Graph) Fanin ¶

func (g *Graph) Fanin(id NodeID) int

Fanin returns the number of incoming edges to the given node.

func (*Graph) Fanout ¶

func (g *Graph) Fanout(id NodeID) int

Fanout returns the number of outgoing edges from the given node.

func (*Graph) FindByFile ¶

func (g *Graph) FindByFile(filePath string) []*Node

FindByFile returns all nodes whose File field matches the given path. The match is suffix-based so callers may pass either a full absolute path or a relative path such as "internal/graph/graph.go"; both resolve correctly against the absolute paths that the parser stores on each node.

Uses the secondary index for O(1) lookup when available; falls back to O(N) scan during initial parsing when the index is not yet ready.

func (*Graph) FindByName ¶

func (g *Graph) FindByName(name string) []*Node

FindByName returns all nodes whose Name field matches the given string (case-insensitive). Also matches qualified names: searching "Close" will match a node named "Store.Close" (suffix after the last dot). An empty slice is returned if nothing matches.

Uses the secondary index for O(1) lookup when available; falls back to O(N) scan during initial parsing when the index is not yet ready.

func (*Graph) FindByPattern ¶

func (g *Graph) FindByPattern(pattern string) []*Node

FindByPattern returns all nodes whose Name contains the given substring (case-insensitive). Useful for fuzzy "find entity" queries. On large graphs (100K+ nodes), consider FindByPatternLimit to cap the scan.

func (*Graph) FindByPatternLimit ¶

func (g *Graph) FindByPatternLimit(pattern string, limit int) []*Node

FindByPatternLimit is like FindByPattern but stops scanning after limit matches are found (0 = unlimited). This prevents O(N) full scans on hot paths where only a few results are needed. Results are sorted by ID for deterministic output.

func (*Graph) FindByType ¶

func (g *Graph) FindByType(t NodeType) []*Node

FindByType returns all nodes of the given NodeType.

func (*Graph) FindTestsFor ¶

func (g *Graph) FindTestsFor(nodeID NodeID) []string

FindTestsFor returns the files of test nodes that call into the given node, found via reverse-BFS over CALLS edges limited to test files. The result is a deduplicated sorted list of test file paths. Returns an empty slice when no test coverage is found.

func (*Graph) FindTestsWithDistance ¶

func (g *Graph) FindTestsWithDistance(nodeID NodeID) []TestRef

FindTestsWithDistance is like FindTestsFor but returns distance-scored results. Tests at distance 1 (directly call the entity) are "critical" — most likely to break. Distance 2 is "likely", distance 3+ is "peripheral".

func (*Graph) GetImportAliases ¶

func (g *Graph) GetImportAliases(file string) map[string]string

GetImportAliases returns the alias → importPath map for the given file. Returns nil if no explicit import aliases were recorded for the file.

func (*Graph) GetInstantiatedTypes ¶

func (g *Graph) GetInstantiatedTypes() map[string]bool

GetInstantiatedTypes returns the union of all instantiated type names across all files. Returns nil if no instantiation data was recorded (e.g. pure Go projects where constructor tracking is not implemented).

func (*Graph) GetNode ¶

func (g *Graph) GetNode(id NodeID) *Node

GetNode returns the node for a given ID, or nil if absent.

func (*Graph) GetVarTypes ¶

func (g *Graph) GetVarTypes(file string) map[string]string

GetVarTypes returns the variable → type map for the given file. Returns nil if no type annotations were recorded for the file.

func (*Graph) HasEdge ¶

func (g *Graph) HasEdge(from, to NodeID, edgeType EdgeType) bool

HasEdge reports whether an edge (from, to, edgeType) exists in the graph. O(1).

func (*Graph) ImpactAnalysis ¶

func (g *Graph) ImpactAnalysis(rootID NodeID, maxDepth int) (*ImpactResult, error)

ImpactAnalysis performs a reverse BFS from rootID following incoming CALLS and IMPLEMENTS edges to find all nodes that could be affected if rootID changes. Results are grouped into depth tiers: direct (depth 1), indirect (depth 2), peripheral (depth 3+). maxDepth caps the traversal (0 uses default of 3).

func (*Graph) InEdges ¶

func (g *Graph) InEdges(id NodeID) []*Edge

InEdges returns a copy of all edges arriving at the given node.

func (*Graph) InEdgesForFile ¶

func (g *Graph) InEdgesForFile(file string) []*Edge

InEdgesForFile returns all incoming edges to nodes whose File matches the given path. Complexity is O(total_nodes + file_in_edges). Paired with OutEdgesForFile to get all edges that touch a file without a full AllEdges() scan — used by CheckViolationsForFile for O(file_edges) violation checks.

func (*Graph) Index ¶

func (g *Graph) Index() *GraphIndex

Index returns the current columnar GraphIndex, or nil if it has not been built yet. Callers should check Index().Ready() before using it.

func (*Graph) InvalidateCache ¶

func (g *Graph) InvalidateCache()

InvalidateCache discards all cached subgraph results. Call this after any batch of graph mutations (e.g. after the watcher re-parses a file) so that subsequent get_context calls see fresh data.

func (*Graph) InvalidateCacheForFile ¶

func (g *Graph) InvalidateCacheForFile(file string)

InvalidateCacheForFile evicts only cached subgraphs that reference the given file. Entries for unrelated entities survive, dramatically improving cache hit rates when a single file changes. Prefer this over InvalidateCache when you know which file was modified.

func (*Graph) MakeNodeID ¶

func (g *Graph) MakeNodeID(file, name string) NodeID

MakeNodeID constructs a canonical NodeID from its components. Format: "repoID::file::name"

When the graph has a repo root set, file paths are stored as project-relative paths (e.g. "cmd/synapses/main.go" instead of "/Users/you/.../main.go"). This significantly reduces token consumption in MCP responses. Node.File retains the absolute path for internal operations like RemoveFile.

func (*Graph) MergeFrom ¶

func (g *Graph) MergeFrom(other *Graph)

MergeFrom copies all nodes, edges, varTypes, and instantiatedTypes from other into g. Existing nodes in g are never overwritten — other's data is purely additive. This is used at startup to merge federated (linked) project graphs and by the watcher to merge parallel-parsed temp graphs back into the main graph.

func (*Graph) MigrateStableID ¶

func (g *Graph) MigrateStableID(n *Node)

MigrateStableID attempts to reuse a stable UUID from a previous snapshot for the given node. It checks snapshots for the node's file in two tiers:

Tier 1: exact (name, pkg, signature) match → certain same entity
Tier 2: same (pkg, signature) with different name → likely rename

If no match is found, the node's current StableID is left unchanged. Must be called AFTER re-parsing and AddNode, but before SnapshotFileStableIDs is called again for the same file.

func (*Graph) NodeCount ¶

func (g *Graph) NodeCount() int

NodeCount returns the total number of nodes.

func (*Graph) NodeCountsByDomain ¶

func (g *Graph) NodeCountsByDomain() map[DomainType]int

NodeCountsByDomain returns a map of domain → node count for all nodes in the graph. Empty domain (code entities parsed before Sprint 16) is counted under DomainCode. This is O(N) under one read lock and does not copy node pointers — use it in hot paths like session_init instead of AllNodes().

func (*Graph) NodesForFile ¶

func (g *Graph) NodesForFile(file string) []*Node

NodesForFile returns all nodes whose source file matches the given path. Used by the watcher to migrate stable IDs after a re-parse.

func (*Graph) OutEdges ¶

func (g *Graph) OutEdges(id NodeID) []*Edge

OutEdges returns a copy of all edges leaving the given node.

func (*Graph) OutEdgesForFile ¶

func (g *Graph) OutEdgesForFile(file string) []*Edge

OutEdgesForFile returns all outgoing edges from nodes whose File matches the given path. Complexity is O(total_nodes + file_out_edges), which is significantly cheaper than AllEdges() + filter when only one file changed.

func (*Graph) PeekCallSites ¶

func (g *Graph) PeekCallSites() []CallSite

PeekCallSites returns a copy of all pending call sites without clearing them. Used to persist call sites to the store before the resolver drains them.

func (*Graph) ProjectIdentity ¶

func (g *Graph) ProjectIdentity() *ProjectIdentity

ProjectIdentity computes a compact architectural summary of the graph. This is the payload returned by the get_project_identity MCP tool. Results are cached for 30 seconds and invalidated on graph mutations.

func (*Graph) RebuildIndex ¶

func (g *Graph) RebuildIndex() ([]byte, error)

RebuildIndex builds a fresh columnar GraphIndex from the current map state and atomically replaces g.index. Only one rebuild runs at a time. It returns the zstd-compressed snapshot bytes for the caller to persist to the store for fast warm-boot loading. Returns nil bytes on serialisation error. Typical usage:

go func() {
    blob, err := g.RebuildIndex()
    if err == nil { st.SaveIndexSnapshot(blob) }
}()

func (*Graph) RemoveCallSitesForFile ¶

func (g *Graph) RemoveCallSitesForFile(file string)

RemoveCallSitesForFile removes any pending call sites whose CallerFile matches the given path. Called by the watcher before re-parsing a changed file so that stale call sites from the old version are not mixed with the newly parsed ones.

func (*Graph) RemoveEdge ¶

func (g *Graph) RemoveEdge(from, to NodeID, edgeType EdgeType)

RemoveEdge removes a single directed edge. No-op if the edge does not exist. O(outDegree + inDegree) due to slice filtering — acceptable for manual edge removals which are rare and not on any hot path.

func (*Graph) RemoveFile ¶

func (g *Graph) RemoveFile(file string)

RemoveFile removes all nodes and their associated edges for a given file path. Used by the file watcher to prune stale data before re-parsing.

func (*Graph) RemoveTerraformRefsForFile ¶

func (g *Graph) RemoveTerraformRefsForFile(file string)

RemoveTerraformRefsForFile removes pending Terraform refs whose FromFile matches the given path. Called by the watcher before re-parsing a .tf file.

func (*Graph) RepoID ¶

func (g *Graph) RepoID() string

RepoID returns the repository identifier this graph was built for.

func (*Graph) Root ¶

func (g *Graph) Root() string

Root returns the absolute filesystem path of the repository root. It is empty if the graph was loaded from a store that predates this field.

func (*Graph) SetFileProvenance ¶

func (g *Graph) SetFileProvenance(filePath string, p ProvenanceType)

SetFileProvenance sets the Provenance field on all nodes whose File matches filePath. Runs under a write lock to avoid the data race that would occur if callers mutated node pointers returned by FindByFile after releasing the lock.

func (*Graph) SetIndex ¶

func (g *Graph) SetIndex(idx *GraphIndex)

SetIndex atomically replaces the graph's columnar index with the provided one. Used during warm-boot to install a snapshot-loaded index without a full rebuild. Also sets g.pool to idx.Pool so subsequent RebuildIndex calls share the same pool.

func (*Graph) SetRoot ¶

func (g *Graph) SetRoot(root string)

SetRoot stores the absolute path of the repository root.

func (*Graph) SnapshotCallsAdjacency ¶

func (g *Graph) SnapshotCallsAdjacency() map[NodeID][]NodeID

SnapshotCallsAdjacency returns a snapshot of CALLS outgoing edges for all nodes.

func (*Graph) SnapshotEdgesAndNodes ¶

func (g *Graph) SnapshotEdgesAndNodes() (map[NodeID][]*Edge, map[NodeID]*Node)

SnapshotEdgesAndNodes returns, under a single RLock, a complete snapshot of all outgoing edges by source node and all nodes by ID. This allows callers to perform graph traversal without per-node lock acquisitions.

func (*Graph) SnapshotFileStableIDs ¶

func (g *Graph) SnapshotFileStableIDs(file string)

SnapshotFileStableIDs records the stable UUIDs of all nodes in the given file so that MigrateStableID can reuse them after the file is re-parsed. Must be called BEFORE RemoveFile for the migration to work correctly.

func (*Graph) SnapshotImportAdjacency ¶

func (g *Graph) SnapshotImportAdjacency() (map[NodeID][]NodeID, map[NodeID]*Node)

SnapshotImportAdjacency returns, under a single RLock, a map of fileNodeID → []packageNodeID for all IMPORTS edges, plus a flat node map. This allows callers to build import lookup tables without per-node locking.

func (*Graph) SnapshotImportAliases ¶

func (g *Graph) SnapshotImportAliases() map[string]map[string]string

SnapshotImportAliases returns a snapshot of all per-file explicit import alias mappings. The outer map is file path → alias → importPath.

func (*Graph) SuggestRules ¶

func (g *Graph) SuggestRules() []SuggestedRule

SuggestRules detects high-density CALLS coupling between directory groups. It groups CALLS edges by their source and target directories, then surfaces directory pairs where ≥85% of nodes in the from-dir call into the to-dir (minimum 3 samples) as suggested architectural rules.

Must be called under g.mu.Lock() — used by ProjectIdentity() which already holds the write lock. Do NOT call g.mu.Lock() inside this method.

Returns up to 5 suggestions ordered by confidence descending.

func (*Graph) ToFlatGraph ¶

func (g *Graph) ToFlatGraph() *FlatGraph

ToFlatGraph converts the pointer-based Graph to a FlatGraph (SoA layout) for cache-friendly BFS traversal. The result is a snapshot — mutations to the original Graph are not reflected. Caller must hold no lock; this method acquires g.mu.RLock internally.

func (*Graph) UpdateFileNodeMetadata ¶

func (g *Graph) UpdateFileNodeMetadata(absFile string, update func(n *Node))

UpdateFileNodeMetadata updates metadata on all nodes belonging to the given file.

func (*Graph) UpdateNodeMetadata ¶

func (g *Graph) UpdateNodeMetadata(id NodeID, update func(n *Node))

UpdateNodeMetadata calls update(n) for every node whose File matches absFile, holding the graph write lock for the duration. The callback may safely read and write n.Metadata — no structural graph changes (add/remove nodes or edges) should be made inside update.

This is the correct way to write node metadata from a background goroutine while the MCP server is live: git I/O should happen before calling this method; the write lock is held only for the in-memory metadata writes (typically microseconds). UpdateNodeMetadata applies update to the node with the given ID under the graph write lock. Safe for concurrent use.

func (*Graph) UpsertRouteNode ¶

func (g *Graph) UpsertRouteNode(n *Node) bool

UpsertRouteNode atomically inserts a synthetic route node if one with the same ID does not already exist. Returns true if the node was newly created. This is the safe alternative to the non-atomic GetNode+AddNode pattern: by holding the write lock for both the existence check and the insert, two concurrent incremental-reindex goroutines cannot both create the same route with different StableIDs.

type GraphIndex ¶

type GraphIndex struct {

	// Pool is the shared string interning pool (from intern.go).
	// All string columns below store StringID values.
	Pool *StringPool

	// Parallel node property slices — all indexed by sequential uint32 seq ID.
	// seq 0 is reserved as the "null / not found" sentinel.
	SeqIDs    []NodeID   // seq → original NodeID string
	Types     []StringID // seq → interned NodeType string
	Names     []StringID // seq → interned Name string
	FileIDs   []StringID // seq → interned File path string
	PkgIDs    []StringID // seq → interned Package string
	Lines     []int32    // seq → line number
	Exported  []bool     // seq → exported flag
	Tombstone []bool     // seq → true means node is deleted (pending compaction)

	// IDToSeq maps NodeID strings → seq for O(1) lookup in the BFS hot path.
	IDToSeq map[NodeID]uint32

	// CSR adjacency lists for outgoing edges.
	// Node with seq i has outgoing edges in OutTargets[OutStart[i]:OutEnd[i]].
	OutStart   []uint32   // len = node count + 2 (1-indexed, sentinel at 0)
	OutEnd     []uint32   // len = node count + 2
	OutTargets []uint32   // flattened target seq IDs
	OutTypes   []StringID // flattened edge type string IDs (parallel to OutTargets)

	// CSR adjacency lists for incoming edges (same layout, reversed direction).
	InStart   []uint32
	InEnd     []uint32
	InTargets []uint32
	InTypes   []StringID

	// TombstoneCount tracks how many nodes are tombstoned.
	// If TombstoneCount/len(SeqIDs) > 0.15, the background compactor triggers.
	TombstoneCount int32 // atomic

	// EigenvectorCentrality stores the normalized (0–1) eigenvector centrality
	// for each node (1-indexed; position 0 is the sentinel, always 0.0).
	// Computed once during buildIndex() / LoadSnapshot() via power iteration on
	// the undirected adjacency.  Architecturally important nodes (connected to
	// other important nodes) get values close to 1.0; leaf/isolated nodes get 0.0.
	// Applied in CarveEgoGraph as: relevance × (1 + centralityBeta × centrality).
	EigenvectorCentrality []float64
	// contains filtered or unexported fields
}

GraphIndex is a read-optimised, cache-friendly columnar view of the graph. It is rebuilt atomically after each parse cycle and never mutated in place — only tombstoned via MarkTombstone / replaced wholesale via RebuildIndex.

func LoadSnapshot ¶

func LoadSnapshot(data []byte, pool *StringPool) (*GraphIndex, error)

LoadSnapshot deserialises a zstd-compressed byte slice produced by SaveSnapshot and returns a ready GraphIndex. The provided pool is reused so strings already interned during a previous session share the same memory.

func (*GraphIndex) InNeighbours ¶

func (idx *GraphIndex) InNeighbours(seq uint32) (sources []uint32, types []StringID)

InNeighbours returns the slice of incoming (source seq, edge type StringID) values.

func (*GraphIndex) IsTombstoned ¶

func (idx *GraphIndex) IsTombstoned(seq uint32) bool

IsTombstoned returns true if the node at seq has been logically deleted.

func (*GraphIndex) MarkTombstone ¶

func (idx *GraphIndex) MarkTombstone(seq uint32)

MarkTombstone logically deletes node seq (e.g. when its source file is edited). The node remains in the slice arrays until the next compaction sweep.

func (*GraphIndex) NodeFile ¶

func (idx *GraphIndex) NodeFile(seq uint32) string

NodeFile returns the interned File path string for seq.

func (*GraphIndex) NodeName ¶

func (idx *GraphIndex) NodeName(seq uint32) string

NodeName returns the interned Name string for seq.

func (*GraphIndex) OutNeighbours ¶

func (idx *GraphIndex) OutNeighbours(seq uint32) (targets []uint32, types []StringID)

OutNeighbours returns the slice of outgoing (target seq, edge type StringID) values for node seq. The returned slices are direct subslices of internal arrays — callers must not modify them.

func (*GraphIndex) Ready ¶

func (idx *GraphIndex) Ready() bool

Ready returns true if the index has been built and is safe for BFS reads.

func (*GraphIndex) ReceiverMethodSeqs ¶

func (idx *GraphIndex) ReceiverMethodSeqs(receiverName string) []uint32

ReceiverMethodSeqs returns seq IDs of methods whose receiver matches the given name (case-insensitive). Used by CarveEgoGraph to seed BFS with struct/interface methods without scanning all nodes. The caller MUST already hold g.mu.RLock — this method does no locking.

func (*GraphIndex) SaveSnapshot ¶

func (idx *GraphIndex) SaveSnapshot() ([]byte, error)

SaveSnapshot serialises idx to a zstd-compressed byte slice. The caller is responsible for persisting the bytes (e.g. in the SQLite meta table).

func (*GraphIndex) Seq ¶

func (idx *GraphIndex) Seq(nid NodeID) uint32

Seq returns the sequential uint32 ID for nid, or 0 (sentinel) if not found.

func (*GraphIndex) TombstoneRatio ¶

func (idx *GraphIndex) TombstoneRatio() float64

TombstoneRatio returns the fraction of nodes that are tombstoned. Used by the background compactor to decide whether to trigger a rebuild.

func (*GraphIndex) UnsafeInNeighbours ¶

func (idx *GraphIndex) UnsafeInNeighbours(seq uint32) (sources []uint32, types []StringID)

UnsafeInNeighbours returns incoming neighbours without acquiring the RLock. Same safety requirements as UnsafeSeq.

func (*GraphIndex) UnsafeIsTombstoned ¶

func (idx *GraphIndex) UnsafeIsTombstoned(seq uint32) bool

UnsafeIsTombstoned checks the tombstone flag without acquiring the RLock. Same safety requirements as UnsafeSeq.

func (*GraphIndex) UnsafeOutNeighbours ¶

func (idx *GraphIndex) UnsafeOutNeighbours(seq uint32) (targets []uint32, types []StringID)

UnsafeOutNeighbours returns outgoing neighbours without acquiring the RLock. Same safety requirements as UnsafeSeq.

func (*GraphIndex) UnsafeSeq ¶

func (idx *GraphIndex) UnsafeSeq(nid NodeID) uint32

UnsafeSeq returns the sequential ID for nid without acquiring the RLock. The caller MUST guarantee that the index is immutable (ready == 1) and hold g.mu.RLock to prevent concurrent MarkTombstone writes.

type GraphSummary ¶

type GraphSummary struct {
	Files      int `json:"files"`
	Packages   int `json:"packages"`
	Functions  int `json:"functions"`
	Methods    int `json:"methods"`
	Structs    int `json:"structs"`
	Interfaces int `json:"interfaces"`
	Edges      int `json:"edges"`
}

GraphSummary contains aggregate counts across the whole graph.

type ImpactResult ¶

type ImpactResult struct {
	Root          EntityRef    `json:"root"`
	Tiers         []ImpactTier `json:"tiers"`
	TotalAffected int          `json:"total_affected"`
	AffectedFiles []string     `json:"affected_files"`
	// Truncated is true when any tier was capped at maxImpactNodesPerTier.
	// Check per-tier Truncated + TotalNodes for exact counts.
	Truncated bool `json:"truncated,omitempty"`
	// TestCoverage lists test files that exercise the root entity (R2).
	// Populated by FindTestsFor via reverse-BFS over CALLS edges filtered to test files.
	TestCoverage []string `json:"test_coverage,omitempty"`
	// APISurface flags whether the root entity is part of the public API surface
	// and lists external consumers. Populated when root is exported.
	APISurface *APISurfaceInfo `json:"api_surface,omitempty"`
	// TestPriority provides distance-scored test files: "critical" tests (distance 1)
	// are most likely to break, "likely" (distance 2) somewhat likely, "peripheral" (3+)
	// less likely. Sorted by distance ascending.
	TestPriority []TestRef `json:"test_priority,omitempty"`
	// ImplementorImpact lists types that implement the root interface/type.
	// When an interface method signature changes, all implementors must update.
	// Populated when root is an interface node or method on an interface.
	ImplementorImpact []EntityRef `json:"implementor_impact,omitempty"`
	// CrossDomainImpact lists entities in other knowledge domains that are
	// directly connected to the root via cross-domain edges (DEPLOYS, CONSUMES,
	// CONFIGURED_BY, DOCUMENTS, MENTIONS, MANUAL). Only edges with confidence ≥ 0.6
	// or confirmed are included — this is enforced at edge-injection time so all
	// edges present in the in-memory graph already satisfy the threshold.
	// Sprint 16 #5: the killer feature — "what infra/API/docs does this touch?"
	CrossDomainImpact []CrossDomainRef `json:"cross_domain_impact,omitempty"`
	// CrossDomainAffected is the count of cross-domain entities in CrossDomainImpact.
	// Kept separate from TotalAffected (which counts code-caller tier nodes) so
	// callers can distinguish code blast-radius from cross-domain blast-radius.
	CrossDomainAffected int `json:"cross_domain_affected,omitempty"`
	// CrossDomainTruncated is true when CrossDomainImpact was capped at
	// maxCrossDomainImpactNodes (100). The full count is not available.
	CrossDomainTruncated bool `json:"cross_domain_truncated,omitempty"`
}

ImpactResult is returned by ImpactAnalysis.

type ImpactTier ¶

type ImpactTier struct {
	Depth      int         `json:"depth"`
	Label      string      `json:"label"`      // "direct" | "indirect" | "peripheral"
	Confidence float64     `json:"confidence"` // 1.0 / 0.6 / 0.3
	Nodes      []EntityRef `json:"nodes"`
	Truncated  bool        `json:"truncated,omitempty"`   // true when nodes were capped
	TotalNodes int         `json:"total_nodes,omitempty"` // actual count before cap
}

ImpactTier groups nodes at the same blast-radius hop distance.

type Node ¶

type Node struct {
	ID       NodeID            `json:"id"`
	Type     NodeType          `json:"type"`
	Name     string            `json:"name"`
	Package  string            `json:"package"`
	File     string            `json:"file"`
	Line     int               `json:"line"`
	Exported bool              `json:"exported"`
	Metadata map[string]string `json:"metadata,omitempty"`
	// StableID is a UUID v4 assigned on first creation and preserved across
	// file renames and incremental re-parses. It provides a stable cross-project
	// reference that does not change when a file is moved. Generated by
	// Graph.AddNode if empty; migrated by Watcher.reparseFile via MigrateStableID.
	StableID string `json:"stable_id,omitempty"`
	// Provenance classifies the trust tier of this node's source file.
	// Derived at index time; defaults to ProvenanceUserAuthored ("").
	// Used by BFS ranking (user-authored nodes surface first) and as a
	// Semantic Firewall gate on high-privilege operations.
	Provenance ProvenanceType `json:"provenance,omitempty"`
	// Domain classifies which knowledge domain this node belongs to.
	// Defaults to DomainCode ("code") for all source-code entities.
	// Future domain parsers (infra, api, docs, issues) set this at index time
	// so that non-code nodes coexist in the same graph without ambiguity.
	// An empty string is treated as DomainCode everywhere in the codebase.
	Domain DomainType `json:"domain,omitempty"`
}

Node represents a single code entity in the graph.

type NodeID ¶

type NodeID string

NodeID is a composite identifier with the format: "repoID::file::name". Using a named type (not a plain string) enforces intent at compile time.

type NodeIndex ¶

type NodeIndex uint32

NodeIndex is a dense integer ID assigned sequentially to each parsed node. This replaces the string-based NodeID ("repo::file::name") in the core engine.

type NodeType ¶

type NodeType string

NodeType classifies what kind of code entity a node represents.

const (
	NodeFile      NodeType = "file"
	NodePackage   NodeType = "package"
	NodeFunction  NodeType = "function"
	NodeMethod    NodeType = "method"
	NodeStruct    NodeType = "struct"
	NodeInterface NodeType = "interface"
	NodeVariable  NodeType = "variable"
	// NodeRoute is a virtual node injected by the heuristic pass (R1).
	// It represents an HTTP/RPC route registration (e.g. "GET /api/users").
	// Not present in the AST — synthesised from framework registration patterns.
	NodeRoute NodeType = "route"
	// NodeSection is a documentation section extracted from a markdown file (R31).
	// Each ATX heading (# through ######) becomes a Section node with metadata:
	// title, depth (1-6), body_preview (first 200 chars), body (up to 2000 chars).
	NodeSection NodeType = "section"

	// NodeConcept is an abstract idea, algorithm, pattern, or methodology
	// extracted from documentation (e.g. "token bucket", "eventual consistency").
	NodeConcept NodeType = "concept"

	// NodeEntity is a named real-world entity extracted from documentation
	// (e.g. a person, organization, product, or external system).
	NodeEntity NodeType = "entity"

	// NodeArtifact is a document, specification, standard, or law referenced
	// in documentation (e.g. "RFC 7519", "OpenAPI 3.0 spec", "GDPR Article 17").
	NodeArtifact NodeType = "artifact"

	// NodeDecision is an architectural or design decision extracted from docs
	// (e.g. from ADR-style prose, decision logs, or architecture notes).
	NodeDecision NodeType = "decision"
)

Node type constants: classify what kind of code entity a node represents.

type ProjectIdentity ¶

type ProjectIdentity struct {
	RepoID         string          `json:"repo_id"`
	Summary        GraphSummary    `json:"summary"`
	EntryPoints    []EntityRef     `json:"entry_points"`
	KeyEntities    []EntityInfo    `json:"key_entities"`
	SuggestedRules []SuggestedRule `json:"suggested_rules,omitempty"`
	// Scale is the repo size tier, computed from semantic node count.
	Scale Scale `json:"scale"`
	// ToolGuidance is a scale-aware recommendation for agents on which tools to prefer.
	ToolGuidance string `json:"tool_guidance"`
}

ProjectIdentity is the compact architectural summary returned by get_project_identity.

type ProvenanceType ¶

type ProvenanceType string

ProvenanceType classifies the trust tier of a graph node. Derived at index time from file path patterns and content headers — no LLM needed.

const (
	// ProvenanceUserAuthored is the default: files written by the user/team.
	ProvenanceUserAuthored ProvenanceType = "user-authored"
	// ProvenanceGenerated marks auto-generated files (protobuf, codegen, mocks).
	ProvenanceGenerated ProvenanceType = "generated"
	// ProvenanceVendored marks third-party dependency files (vendor/, node_modules/).
	ProvenanceVendored ProvenanceType = "vendored"
	// ProvenanceExternal marks content ingested from the web via scout sidecar.
	//
	// ARCHITECTURAL NOTE: This constant is defined and wired into the BFS weight
	// system (weight 0.2 — lowest tier) and the digest display layer, but it is
	// never set by any current code path. web_annotate() attaches web findings as
	// annotations on existing graph nodes — it does not create new NodeWebContent
	// nodes. A future implementation would create dedicated web-content nodes
	// tagged ProvenanceExternal when ingesting scout results. Until then this
	// constant is intentionally unused — do not remove it.
	ProvenanceExternal ProvenanceType = "external"
)

type QualityNode ¶

type QualityNode struct {
	ID   NodeID
	Name string
	File string
}

QualityNode carries the graph identity and file context for a single node passed to CarveConfig.QualityScoreLookup. Name and File allow closures to convert to entityWithPath format without calling Graph.GetNode — which would attempt to re-acquire g.mu.RLock and potentially deadlock because CarveEgoGraph already holds the lock when it invokes QualityScoreLookup.

type Scale ¶

type Scale string

Scale classifies a project's size based on semantic node count (functions + methods + structs + interfaces). Used to give agents scale-aware guidance on when to prefer Synapses tools vs direct file access.

const (
	// ScaleMicro represents projects <100 semantic nodes — Read/Grep often faster.
	ScaleMicro Scale = "micro"
	// ScaleSmall represents projects with 100–499 nodes — prefer Synapses for exploration.
	ScaleSmall Scale = "small"
	// ScaleMedium represents projects with 500–1999 nodes — strongly prefer Synapses tools.
	ScaleMedium Scale = "medium"
	// ScaleLarge represents projects with 2000+ nodes — always use Synapses tools.
	ScaleLarge Scale = "large"
)

type StringID ¶

type StringID uint32

StringID is a compact index into the StringPool.

type StringPool ¶

type StringPool struct {
	// contains filtered or unexported fields
}

StringPool implements a bi-directional mapping between strings and uint32 IDs. It leverages the Go 1.23 `unique` package to ensure that identical strings share the same underlying memory allocation across the entire application, massively reducing heap usage in large repositories.

StringPool is safe for concurrent use.

func NewStringPool ¶

func NewStringPool() *StringPool

NewStringPool creates a new, empty string interning pool.

func (*StringPool) Intern ¶

func (p *StringPool) Intern(s string) StringID

Intern takes a raw string, deduplicates its memory using the `unique` package, and returns a compact StringID. If the string has already been interned, it returns the existing ID.

func (*StringPool) Value ¶

func (p *StringPool) Value(id StringID) string

Value looks up the string associated with the given StringID. It handles both properly interned strings and transient "Ghost" strings.

type SubGraph ¶

type SubGraph struct {
	Root           NodeID       `json:"root"`
	Nodes          []CarvedNode `json:"nodes"`
	Edges          []*Edge      `json:"edges"`
	Truncated      bool         `json:"truncated,omitempty"`       // true when token budget cut BFS results
	TruncatedCount int          `json:"truncated_count,omitempty"` // number of nodes dropped by budget
}

SubGraph is the result of a context carve: a relevance-ranked slice of the graph.

type SuggestedRule ¶

type SuggestedRule struct {
	// ID is a stable slug derived from the directory pair.
	ID string `json:"id"`
	// Description is a human-readable summary including sample counts.
	Description string `json:"description"`
	// Confidence is the fraction of from-dir nodes that call into to-dir (0–1).
	Confidence float64 `json:"confidence"`
	// SampleCount is the number of distinct from-dir nodes that exhibit the pattern.
	SampleCount int `json:"sample_count"`
	// FromDirPattern is a glob suitable for use as from_file_pattern in a rule.
	FromDirPattern string `json:"from_dir_pattern"`
	// ToDirPattern is a glob suitable for use as to_file_pattern in a rule.
	ToDirPattern string `json:"to_dir_pattern"`
	// EdgeType is the type of coupling detected (always EdgeCalls for now).
	EdgeType EdgeType `json:"edge_type"`
}

SuggestedRule is a detected high-density structural coupling pattern. Returned in get_project_identity to surface architectural conventions that the team may want to formalise as explicit forbidden-edge rules.

type TerraformRef ¶

type TerraformRef struct {
	FromID   NodeID // node ID of the resource containing the reference
	FromFile string // absolute path of the .tf file containing the reference
	RefName  string // target resource name: "type.name" or "data.type.name" or "module.name"
}

TerraformRef records an unresolved Terraform resource reference encountered during .tf file parsing. The resolver drains these after all files are parsed and creates DEPENDS_ON edges between resource nodes. This enables cross-file dependency resolution: a resource in vpc.tf can depend on one in compute.tf.

type TestRef ¶

type TestRef struct {
	File     string `json:"file"`
	Distance int    `json:"distance"` // BFS hops from changed entity
	Priority string `json:"priority"` // TestPriorityCritical, TestPriorityLikely, TestPriorityPeripheral
}

TestRef represents a test file with its distance from the changed entity.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL