Documentation
¶
Overview ¶
Package graph implements the core in-memory graph engine for Synapses. It stores code entities (nodes) and their relationships (edges), and provides BFS-based context carving with edge-type-weighted relevance decay.
Index ¶
- Constants
- Variables
- func CrossDomainCategory(et EdgeType) string
- func ExportDOT(nodes []*Node, edges []*Edge, repoRoot string, includeMeta bool) string
- func ExportGraphML(nodes []*Node, edges []*Edge, repoRoot string) string
- func ExportMermaid(nodes []*Node, edges []*Edge, repoRoot string, includeMeta bool) string
- func IntentCarveWeights(intent string) map[EdgeType]float64
- func IntentDirectionBoost(intent string) float64
- func IsCrossDomainEdge(et EdgeType) bool
- func NodeTypeToUint8(nt NodeType) uint8
- func Serialize(fg *FlatGraph, w io.Writer) error
- type APISurfaceInfo
- type BulkEdge
- type CallSite
- type CarveConfig
- type CarvedNode
- type CrossDomainContext
- type CrossDomainRef
- type DomainType
- type Edge
- type EdgeIndex
- type EdgeType
- type EdgeTypeDescriptor
- type EdgeWeightKey
- type EntityInfo
- type EntityRef
- type ErrNodeNotFound
- type FlatGraph
- func (fg *FlatGraph) AddNode(name StringID, nodeType NodeType, fileID StringID, nsID uint16) NodeIndex
- func (fg *FlatGraph) BulkAddEdges(edges []BulkEdge) int
- func (fg *FlatGraph) ExtID(idx NodeIndex) NodeID
- func (fg *FlatGraph) LookupIndex(id NodeID) (NodeIndex, bool)
- func (fg *FlatGraph) Neighbors(idx NodeIndex) []NodeIndex
- func (fg *FlatGraph) NodeIDAt(idx NodeIndex) NodeID
- type Graph
- func (g *Graph) AddCallSite(cs CallSite)
- func (g *Graph) AddEdge(e *Edge)
- func (g *Graph) AddImportAlias(file, alias, importPath string)
- func (g *Graph) AddInstantiatedType(file, typeName string)
- func (g *Graph) AddNode(n *Node)
- func (g *Graph) AddTerraformRef(ref TerraformRef)
- func (g *Graph) AddVarType(file, varName, typeName string)
- func (g *Graph) AllEdges() []*Edge
- func (g *Graph) AllNodes() []*Node
- func (g *Graph) BulkAddCallSites(sites []CallSite)
- func (g *Graph) CacheLen() int
- func (g *Graph) CarveEgoGraph(rootID NodeID, cfg CarveConfig) (*SubGraph, error)
- func (g *Graph) ClearFileSnapshot(file string)
- func (g *Graph) Compact()
- func (g *Graph) CrossDomainImpactForNode(nodeID NodeID) ([]CrossDomainRef, bool)
- func (g *Graph) CrossRepoCalls(primaryRepoID string) (crossCallCount int, linkedRepos []string)
- func (g *Graph) DirectNeighbors(id NodeID) []NodeID
- func (g *Graph) DrainCallSites() []CallSite
- func (g *Graph) DrainTerraformRefs() []TerraformRef
- func (g *Graph) EdgeCount() int
- func (g *Graph) EdgeCountsByType() map[EdgeType]int
- func (g *Graph) EdgesForFile(file string) []*Edge
- func (g *Graph) EnableFlatGraph()
- func (g *Graph) Fanin(id NodeID) int
- func (g *Graph) Fanout(id NodeID) int
- func (g *Graph) FindByFile(filePath string) []*Node
- func (g *Graph) FindByName(name string) []*Node
- func (g *Graph) FindByPattern(pattern string) []*Node
- func (g *Graph) FindByPatternLimit(pattern string, limit int) []*Node
- func (g *Graph) FindByType(t NodeType) []*Node
- func (g *Graph) FindTestsFor(nodeID NodeID) []string
- func (g *Graph) FindTestsWithDistance(nodeID NodeID) []TestRef
- func (g *Graph) GetImportAliases(file string) map[string]string
- func (g *Graph) GetInstantiatedTypes() map[string]bool
- func (g *Graph) GetNode(id NodeID) *Node
- func (g *Graph) GetVarTypes(file string) map[string]string
- func (g *Graph) HasEdge(from, to NodeID, edgeType EdgeType) bool
- func (g *Graph) ImpactAnalysis(rootID NodeID, maxDepth int) (*ImpactResult, error)
- func (g *Graph) InEdges(id NodeID) []*Edge
- func (g *Graph) InEdgesForFile(file string) []*Edge
- func (g *Graph) Index() *GraphIndex
- func (g *Graph) InvalidateCache()
- func (g *Graph) InvalidateCacheForFile(file string)
- func (g *Graph) MakeNodeID(file, name string) NodeID
- func (g *Graph) MergeFrom(other *Graph)
- func (g *Graph) MigrateStableID(n *Node)
- func (g *Graph) NodeCount() int
- func (g *Graph) NodeCountsByDomain() map[DomainType]int
- func (g *Graph) NodesForFile(file string) []*Node
- func (g *Graph) OutEdges(id NodeID) []*Edge
- func (g *Graph) OutEdgesForFile(file string) []*Edge
- func (g *Graph) PeekCallSites() []CallSite
- func (g *Graph) ProjectIdentity() *ProjectIdentity
- func (g *Graph) RebuildIndex() ([]byte, error)
- func (g *Graph) RemoveCallSitesForFile(file string)
- func (g *Graph) RemoveEdge(from, to NodeID, edgeType EdgeType)
- func (g *Graph) RemoveFile(file string)
- func (g *Graph) RemoveTerraformRefsForFile(file string)
- func (g *Graph) RepoID() string
- func (g *Graph) Root() string
- func (g *Graph) SetFileProvenance(filePath string, p ProvenanceType)
- func (g *Graph) SetIndex(idx *GraphIndex)
- func (g *Graph) SetRoot(root string)
- func (g *Graph) SnapshotCallsAdjacency() map[NodeID][]NodeID
- func (g *Graph) SnapshotEdgesAndNodes() (map[NodeID][]*Edge, map[NodeID]*Node)
- func (g *Graph) SnapshotFileStableIDs(file string)
- func (g *Graph) SnapshotImportAdjacency() (map[NodeID][]NodeID, map[NodeID]*Node)
- func (g *Graph) SnapshotImportAliases() map[string]map[string]string
- func (g *Graph) SuggestRules() []SuggestedRule
- func (g *Graph) ToFlatGraph() *FlatGraph
- func (g *Graph) UpdateFileNodeMetadata(absFile string, update func(n *Node))
- func (g *Graph) UpdateNodeMetadata(id NodeID, update func(n *Node))
- func (g *Graph) UpsertRouteNode(n *Node) bool
- type GraphIndex
- func (idx *GraphIndex) InNeighbours(seq uint32) (sources []uint32, types []StringID)
- func (idx *GraphIndex) IsTombstoned(seq uint32) bool
- func (idx *GraphIndex) MarkTombstone(seq uint32)
- func (idx *GraphIndex) NodeFile(seq uint32) string
- func (idx *GraphIndex) NodeName(seq uint32) string
- func (idx *GraphIndex) OutNeighbours(seq uint32) (targets []uint32, types []StringID)
- func (idx *GraphIndex) Ready() bool
- func (idx *GraphIndex) ReceiverMethodSeqs(receiverName string) []uint32
- func (idx *GraphIndex) SaveSnapshot() ([]byte, error)
- func (idx *GraphIndex) Seq(nid NodeID) uint32
- func (idx *GraphIndex) TombstoneRatio() float64
- func (idx *GraphIndex) UnsafeInNeighbours(seq uint32) (sources []uint32, types []StringID)
- func (idx *GraphIndex) UnsafeIsTombstoned(seq uint32) bool
- func (idx *GraphIndex) UnsafeOutNeighbours(seq uint32) (targets []uint32, types []StringID)
- func (idx *GraphIndex) UnsafeSeq(nid NodeID) uint32
- type GraphSummary
- type ImpactResult
- type ImpactTier
- type Node
- type NodeID
- type NodeIndex
- type NodeType
- type ProjectIdentity
- type ProvenanceType
- type QualityNode
- type Scale
- type StringID
- type StringPool
- type SubGraph
- type SuggestedRule
- type TerraformRef
- type TestRef
Constants ¶
const ( TestPriorityCritical = "critical" // distance 1: directly calls changed entity TestPriorityLikely = "likely" // distance 2: calls through one intermediate TestPriorityPeripheral = "peripheral" // distance 3+: transitive dependency )
Test priority constants for FindTestsWithDistance.
const MaxPoolSize = 5_000_000
MaxPoolSize is the upper bound on the number of interned strings. Intern returns a ghost ID once this limit is reached, preventing unbounded memory growth in extremely large repositories.
const ReservedGhostRange = 1000
ReservedGhostRange is the number of StringIDs reserved at the beginning of the pool for transient or unindexed strings (Ghost Nodes). This prevents out-of-bounds panics when an agent requests a file that hasn't been saved to the SQLite BLOB yet.
Variables ¶
var DefaultEdgeWeights = map[EdgeType]float64{ EdgeCalls: 1.0, EdgeDataFlows: 0.95, EdgeImplements: 0.9, EdgeEmbeds: 0.85, EdgeDependsOn: 0.8, EdgeImports: 0.7, EdgeExports: 0.5, EdgeDefines: 0.15, EdgeHandles: 0.9, EdgeContains: 0.15, EdgeExplains: 0.7, EdgeDocumentedBy: 0.6, EdgeLinksTo: 0.3, EdgeManual: 0.5, EdgeDeploys: 0.75, EdgeConsumes: 0.75, EdgeConfiguredBy: 0.65, EdgeDocuments: 0.65, EdgeMentions: 0.55, EdgeContradicts: 0.6, EdgeCausedBy: 0.5, EdgeInstanceOf: 0.4, EdgeRelatesTo: 0.3, }
DefaultEdgeWeights defines the semantic significance of each edge type. Higher weight = more relevant when carving context. Configurable via synapses.json.
EdgeDefines is intentionally low (0.15) because file→entity DEFINES edges would otherwise turn every file node into a high-relevance hub, equalising all siblings in a file with equal — and misleading — relevance scores.
var EdgeTypeCatalog = []EdgeTypeDescriptor{ { Name: EdgeCalls, Description: "Function or method invocation. Direction: caller → callee. Highest BFS weight — runtime behaviour flows along CALLS edges.", SemanticWeight: 1.0, Direction: "directed", Domain: DomainCode, }, { Name: EdgeDataFlows, Description: "Data dependency between entities: a value produced by one entity is consumed by another. Near-highest weight — data-flow edges are critical for debugging and impact analysis.", SemanticWeight: 0.95, Direction: "directed", Domain: DomainCode, }, { Name: EdgeImplements, Description: "Struct or type implements an interface. Direction: concrete type → interface. High weight — interface compliance is central to code review and contract analysis.", SemanticWeight: 0.9, Direction: "directed", Domain: DomainCode, }, { Name: EdgeHandles, Description: "HTTP/RPC route dispatches to a handler function. Direction: route node → handler. Injected by the R1 heuristic pass (not AST-derived). Confidence stored in route node metadata.", SemanticWeight: 0.9, Direction: "directed", Domain: DomainCode, Synthetic: true, }, { Name: EdgeEmbeds, Description: "Struct embeds another struct (Go embedding / composition). Direction: outer struct → embedded struct. High weight — embedding propagates the full method set.", SemanticWeight: 0.85, Direction: "directed", Domain: DomainCode, }, { Name: EdgeDependsOn, Description: "Explicit dependency relationship between entities or modules. Broader than CALLS — captures package-level or declarative dependencies not visible as direct call sites.", SemanticWeight: 0.8, Direction: "directed", Domain: DomainCode, }, { Name: EdgeDeploys, Description: "Code entity deploys an infrastructure resource. Direction: code entity → Terraform/k8s resource node. Strong cross-domain dependency — code changes may break deployed infrastructure.", SemanticWeight: 0.75, Direction: "directed", Domain: DomainInfra, Synthetic: true, }, { Name: EdgeConsumes, Description: "Code entity calls or depends on an API endpoint or service. Direction: code entity → OpenAPI endpoint / gRPC service node. Strong cross-domain dependency — API changes break consuming code.", SemanticWeight: 0.75, Direction: "directed", Domain: DomainAPI, Synthetic: true, }, { Name: EdgeImports, Description: "Source file or package imports another package. Direction: importer → imported package node. Lower weight than CALLS — import edges are structurally noisy (every file that uses a stdlib type gets an edge).", SemanticWeight: 0.7, Direction: "directed", Domain: DomainCode, }, { Name: EdgeExplains, Description: "Documentation section describes a code entity (R31). Direction: Section node → code entity. Moderate weight — doc context is valuable but secondary to structural code edges.", SemanticWeight: 0.7, Direction: "directed", Domain: DomainDocs, Synthetic: true, }, { Name: EdgeConfiguredBy, Description: "Code entity is controlled by a configuration resource. Direction: code entity → Terraform variable / k8s ConfigMap / config file node. Cross-domain — config changes can silently break code behaviour.", SemanticWeight: 0.65, Direction: "directed", Domain: DomainInfra, Synthetic: true, }, { Name: EdgeDocuments, Description: "Documentation section describes a cross-domain entity (broader than EXPLAINS). Direction: docs section → any entity (code, infra, API). Used for README sections that describe Terraform modules or API specs.", SemanticWeight: 0.65, Direction: "directed", Domain: DomainDocs, Synthetic: true, }, { Name: EdgeDocumentedBy, Description: "Reverse of EXPLAINS: code entity references its documentation section (R31). Direction: code entity → Section node. Slightly lower than EXPLAINS so code-to-code edges are preferred under token budget pressure.", SemanticWeight: 0.6, Direction: "directed", Domain: DomainDocs, Synthetic: true, }, { Name: EdgeContradicts, Description: "Sprint 17 NL-to-graph: two entities express conflicting information (e.g. two doc sections with incompatible claims). Direction: newer/conflicting entity → established entity. Weight reflects the high signal value of detected contradictions.", SemanticWeight: 0.6, Direction: "directed", Domain: DomainKnowledge, Synthetic: true, }, { Name: EdgeMentions, Description: "Synthetic cross-domain name-match edge. Direction: any entity → any entity across domain boundary. Created by the name-matching background pass when two entities share the same identifier across domains. Confidence (0.0–1.0) stored in edge metadata; only edges with confidence ≥ 0.6 are auto-created.", SemanticWeight: 0.55, Direction: "directed", Domain: DomainKnowledge, Synthetic: true, }, { Name: EdgeExports, Description: "Module or file exports an identifier. Direction: file/module → exported symbol. Medium-low weight — captures public API surface without dominating BFS traversal.", SemanticWeight: 0.5, Direction: "directed", Domain: DomainCode, }, { Name: EdgeManual, Description: "User-defined cross-domain relationship created via link_entities. Used when no standard edge type applies. Medium BFS weight (0.5) — traversed but lower priority than structural code edges.", SemanticWeight: 0.5, Direction: "directed", Domain: DomainCustom, Synthetic: true, }, { Name: EdgeCausedBy, Description: "Sprint 17 NL-to-graph: causal relationship between entities extracted from documentation. Direction: effect → cause (e.g. OutOfMemoryError caused_by LeakedConnection). Enables root-cause traversal in knowledge graph queries.", SemanticWeight: 0.5, Direction: "directed", Domain: DomainKnowledge, Synthetic: true, }, { Name: EdgeInstanceOf, Description: "Sprint 17 NL-to-graph: type hierarchy relationship extracted from documentation. Direction: specific → general (e.g. Redis instance_of CacheSystem). Lower weight than structural relationships — type hierarchy is contextual, not runtime-critical.", SemanticWeight: 0.4, Direction: "directed", Domain: DomainKnowledge, Synthetic: true, }, { Name: EdgeLinksTo, Description: "Markdown cross-document link (R31). Direction: source document/section → target document node. Lowest semantic weight among doc edges — navigation structure, not content relationship.", SemanticWeight: 0.3, Direction: "directed", Domain: DomainDocs, Synthetic: true, }, { Name: EdgeRelatesTo, Description: "Sprint 17 NL-to-graph: generic fallback relationship between knowledge entities when no more specific type applies. Direction: source entity → related entity. Created by Tier 0 heuristic extraction; may be upgraded to a typed edge by Tier 2 LLM classification.", SemanticWeight: 0.3, Direction: "directed", Domain: DomainKnowledge, Synthetic: true, }, { Name: EdgeContains, Description: "Document file contains a section, or parent section contains a subsections (R31). Direction: doc file/section → child section. Structural edge — same intentionally low weight as DEFINES to avoid hub inflation.", SemanticWeight: 0.15, Direction: "directed", Domain: DomainDocs, Synthetic: true, }, { Name: EdgeDefines, Description: "Source file defines a code entity. Direction: file node → entity node. Lowest weight — every entity has exactly one DEFINES edge, so including it at higher weight would uniformly equalise all siblings in a file.", SemanticWeight: 0.15, Direction: "directed", Domain: DomainCode, }, }
EdgeTypeCatalog is the authoritative registry of all edge types in the graph. Every entry in DefaultEdgeWeights must have a corresponding descriptor here — the TestEdgeTypeCatalogCompleteness test enforces this invariant at test time.
Sprint 16 adds: DEPLOYS, CONSUMES, CONFIGURED_BY (code-to-infra/api), DOCUMENTS (docs-to-code), MENTIONS (cross-domain name match). When new edge types are added, append a descriptor here AND add to DefaultEdgeWeights.
var Pool = NewStringPool()
Pool is the global instance of the StringPool accessed by FlatGraph.
Functions ¶
func CrossDomainCategory ¶
CrossDomainCategory returns the human-readable category for a cross-domain edge type.
func ExportDOT ¶
ExportDOT serialises nodes and edges as a Graphviz DOT digraph. repoRoot is stripped from file paths for readability; pass "" to skip. includeMeta adds signature metadata to node labels when present.
func ExportGraphML ¶
ExportGraphML serialises nodes and edges as GraphML XML.
func ExportMermaid ¶
ExportMermaid serialises nodes and edges as a Mermaid LR flowchart.
func IntentCarveWeights ¶
IntentCarveWeights returns the pre-allocated edge weight map for the given intent. These maps are package-level vars — zero allocation at call time. Falls back to DefaultEdgeWeights for unknown intents.
func IntentDirectionBoost ¶
IntentDirectionBoost returns the DirectionBoost value for the given intent. Positive = prefer callees, negative = prefer callers, 0 = balanced.
func IsCrossDomainEdge ¶
IsCrossDomainEdge returns true for edge types that connect entities across knowledge domain boundaries (code ↔ infra ↔ api ↔ docs ↔ knowledge ↔ custom). Used by collectCrossDomainImpact for one-hop impact detection.
Note: BFS/PPR cross-domain decay is applied based on node.Domain comparison (currNode.Domain != neighNode.Domain), not on edge type. This function is not called in the BFS/PPR hot path — it classifies edge types for impact analysis.
func NodeTypeToUint8 ¶
NodeTypeToUint8 maps a NodeType to its serialized uint8 value.
Types ¶
type APISurfaceInfo ¶
type APISurfaceInfo struct {
Exported bool `json:"exported"`
ExternalPackages int `json:"external_packages"` // number of packages that import this entity's package
BreakingRisk string `json:"breaking_risk"` // "high" (exported, many consumers), "medium" (exported, few), "low" (not exported)
}
APISurfaceInfo describes whether an entity is part of the public API and how many external consumers depend on it.
type CallSite ¶
type CallSite struct {
CallerID NodeID // node ID of the calling function/method
CallerFile string // absolute path of the file containing the caller
PkgAlias string // "" for direct calls; "pkg" for pkg.Func() qualified calls
FuncName string // name of the function/method being called
}
CallSite records an unresolved function call encountered during parsing. The resolver drains these after all files are parsed and creates CALLS edges.
type CarveConfig ¶
type CarveConfig struct {
// MaxDepth is the maximum number of hops from the root node.
MaxDepth int
// TokenBudget caps the approximate output size in tokens (1 token ≈ 4 chars).
TokenBudget int
// EdgeWeights overrides DefaultEdgeWeights. Nil means use defaults.
EdgeWeights map[EdgeType]float64
// DecayFactor is multiplied per hop: relevance = weight × (decay ^ hop).
DecayFactor float64
// MinRelevance drops any node whose relevance score falls below this threshold
// before the token-budget cut is applied. Prevents low-signal siblings and
// package-import nodes from crowding out actual dependencies.
// See DefaultCarveConfig() for tuning guidance (BFS vs PPR interaction).
MinRelevance float64
// ExcludeTypes lists node types to omit from the response. These nodes are
// still traversed during BFS (so edges through them are discovered) but are
// never emitted to the caller. Defaults to {NodePackage, NodeFile} so that
// stdlib imports and file hub-nodes do not waste the token budget.
ExcludeTypes map[NodeType]bool
// ExcludeTestFiles omits nodes whose source file ends in _test.go from the
// output. The nodes are still BFS-traversed (so their edges are discovered)
// but they are never emitted to the caller. Defaults to true so that test
// functions do not crowd the related bucket for well-tested codebases.
ExcludeTestFiles bool
// DirectionBoost applies a relevance multiplier along the CALLS direction.
// Positive: boosts outgoing (callee) edges — token-budget pruner prefers
// what this node calls. Negative: boosts incoming (caller) edges — pruner
// prefers what calls this node. 0 disables directional preference. Default: 0.2.
DirectionBoost float64
// IntentID is an optional cache-key discriminator for intent-specific configs.
// When EdgeWeights are overridden per-intent, set this to the intent string
// (e.g. "modify", "debug") so that intent-specific subgraphs are cached
// separately and do not collide with the default or other intents.
IntentID string
// UsePPR switches the traversal engine from BFS to Personalized PageRank.
// PPR captures multi-path importance: a node reached via N independent call
// chains scores N× higher than a structurally equivalent single-path node.
// BFS max-score heuristic cannot represent this. Default: false (BFS).
// Validated by Sprint 13 #1 spike (diamond 4.69×, wide-fan 5.68× PPR boost).
UsePPR bool
// Alpha is the PPR teleport probability — the chance the random walk jumps
// back to the root (personalized restart) at each step. Higher alpha means
// tighter focus on root with shorter effective reach. Default: 0.15
// (standard PageRank restart rate). Only used when UsePPR=true.
// Values outside (0,1) are clamped to 0.15.
Alpha float64
// EmbeddingLookup batch-fetches pre-normalized float32 embedding vectors for
// a set of node IDs. Called once after BFS/PPR with all scored node IDs.
// IDs with no stored embedding are omitted from the result map. Nil disables
// semantic hybrid scoring (pure structural — backward-compatible default).
EmbeddingLookup func(ids []NodeID) map[NodeID][]float32
// HybridLambda controls the semantic blend weight applied after BFS/PPR:
// finalScore = (1-λ)×structural + λ×cosineSim(embed(root), embed(n))
// Range [0, 1]. 0 = pure structural (default). Ignored when EmbeddingLookup
// is nil or the root node has no stored embedding.
// Recommended production value: 0.3 (70% structural, 30% semantic).
HybridLambda float64
// QualityScoreLookup returns per-entity context quality scores keyed by
// node ID. Scores are the signed sum of signal_weight values from
// outcome_signals (Sprint 15 #1/2). Positive = context was consistently
// helpful; negative = context was repeatedly insufficient or abandoned.
// Called once after BFS/PPR scoring with all surviving nodes.
// Each QualityNode carries the ID, Name, and File so closures can convert
// to entityWithPath format without re-acquiring the graph read lock (which
// would deadlock — CarveEgoGraph already holds g.mu.RLock when calling this).
// Nil disables quality-based re-ranking (backward-compatible default).
QualityScoreLookup func(nodes []QualityNode) map[NodeID]float64
// CrossDomainDecay is a multiplier applied to relevance when BFS/PPR crosses
// a domain boundary (e.g., code→infra, code→api). Range (0, 1].
// A value of 0.5 (default) means cross-domain neighbors score at half the
// relevance of same-domain neighbors at the same structural distance.
// This keeps same-domain code nodes higher in the ranking while still
// surfacing cross-domain context at meaningfully lower relevance.
// 0 disables the domain-boundary penalty (treats all edges equally).
// Values ≥ 1 are clamped to 1.0 (no penalty — backward compatible).
CrossDomainDecay float64
// LearnedEdgeWeights contains per-specific-edge weight multipliers derived
// from historical task outcomes (Sprint 15 #3). When traversing edge
// (From→To, Type), the base edgeWeight is multiplied by this value.
// A multiplier of 1.0 is neutral; >1.0 boosts the edge; <1.0 penalises it.
// Cap: 2.0x boost, floor: 0.3x penalty. Nil disables learned-weight
// adjustments (backward-compatible default).
LearnedEdgeWeights map[EdgeWeightKey]float64
// LearnedEdgeWeightsVersion is the store's monotonic write counter at the
// time LearnedEdgeWeights was loaded. It is included in the subgraph cache
// key so that cached subgraphs are automatically invalidated after any write
// to the edge_learned_weights table — regardless of whether the map has the
// same number of entries (len-based discrimination is not sufficient).
LearnedEdgeWeightsVersion int64
}
CarveConfig controls how an ego-subgraph is extracted for a query node.
func DefaultCarveConfig ¶
func DefaultCarveConfig() CarveConfig
DefaultCarveConfig returns sensible defaults for context carving.
MinRelevance / PPR interaction:
- BFS path: MinRelevance=0.01 prunes nodes whose relevance has decayed below 1% of root. With decay=0.5 and a 16K-edge graph this allows ~6 hops for narrow chains and ~3 hops for hub nodes (degree-normalized adaptive decay). Raising MinRelevance tightens the subgraph; lowering it risks hub explosion.
- PPR path (UsePPR=true): power iteration assigns near-zero scores to distant nodes naturally — MinRelevance=0.01 trims the long tail without aggressive pruning. The spike benchmark (ppr_spike_test.go) validated this threshold against diamond and wide-fan graph topologies. Lowering below 0.001 has negligible recall gain with O(N) cost. Raising above 0.05 risks losing semantically adjacent nodes in sparse subgraphs.
Recommended tuning guide:
- Default (0.01) — correct for most codebases up to ~50K nodes.
- Dense monorepos (>100K edges): raise to 0.03–0.05 to keep carves fast.
- Sparse/small repos (<1K nodes): lower to 0.005 to improve recall depth.
type CarvedNode ¶
type CarvedNode struct {
Node *Node `json:"node"`
Relevance float64 `json:"relevance"`
Hop int `json:"hop"`
}
CarvedNode is a node annotated with its relevance score and hop distance from the query root, as computed during a carving traversal.
type CrossDomainContext ¶
type CrossDomainContext struct {
Deploys []CarvedNode `json:"deploys,omitempty"`
Consumes []CarvedNode `json:"consumes,omitempty"`
ConfiguredBy []CarvedNode `json:"configured_by,omitempty"`
DocumentedIn []CarvedNode `json:"documented_in,omitempty"`
Mentions []CarvedNode `json:"mentions,omitempty"`
Manual []CarvedNode `json:"manual,omitempty"`
Related []CarvedNode `json:"related,omitempty"` // multi-hop or no direct edge from root
}
CrossDomainContext groups cross-domain CarvedNodes from a BFS/PPR subgraph by their relationship to the root entity. Used by directionalContext in get_context responses. Each sub-slice preserves BFS Relevance scores for ranking within the sub-bucket.
Nodes connected via a direct edge from/to root are categorized by edge type. Multi-hop cross-domain nodes with no direct root edge go into Related.
func (*CrossDomainContext) IsEmpty ¶
func (c *CrossDomainContext) IsEmpty() bool
IsEmpty returns true when all sub-buckets are empty.
type CrossDomainRef ¶
type CrossDomainRef struct {
EntityRef
// EdgeType is the cross-domain edge type that led to this entity
// (e.g. "DEPLOYS", "CONSUMES", "CONFIGURED_BY", "DOCUMENTS", "MENTIONS", "MANUAL").
EdgeType EdgeType `json:"edge_type"`
// Category is a human-readable grouping derived from EdgeType:
// "infra" (DEPLOYS), "api" (CONSUMES), "config" (CONFIGURED_BY),
// "docs" (DOCUMENTS), "related" (MENTIONS/MANUAL).
Category string `json:"category"`
}
CrossDomainRef is a single entity reached via a cross-domain edge during impact analysis. Category groups the finding by relationship type so agents can answer "what Terraform resources does this deploy to?" etc.
type DomainType ¶
type DomainType string
DomainType classifies which knowledge domain a graph node belongs to. Code is the default domain; future parsers and connectors set other domains so that infrastructure, API, doc, and issue nodes can coexist in the same graph.
const ( // DomainCode is the default: source-code entities (functions, structs, etc.). DomainCode DomainType = "code" // DomainInfra represents infrastructure resources (Terraform, k8s, Docker). DomainInfra DomainType = "infra" // DomainAPI represents API schema entities (OpenAPI endpoints, gRPC services). DomainAPI DomainType = "api" // DomainDocs represents documentation sections (Markdown, wikis). DomainDocs DomainType = "docs" // DomainIssues represents external tickets and issues (GitHub, Linear, Jira). DomainIssues DomainType = "issues" // DomainCustom is a catch-all for user-defined domain parsers and connectors. DomainCustom DomainType = "custom" // DomainKnowledge represents cross-domain or meta-level relationships. // Used by synthetic edges (e.g. MENTIONS) that bridge two existing-domain entities // rather than belonging to any single domain. Sprint 16. DomainKnowledge DomainType = "knowledge" )
type EdgeType ¶
type EdgeType string
EdgeType classifies the relationship between two nodes.
const ( EdgeImports EdgeType = "IMPORTS" EdgeCalls EdgeType = "CALLS" EdgeImplements EdgeType = "IMPLEMENTS" EdgeDefines EdgeType = "DEFINES" EdgeEmbeds EdgeType = "EMBEDS" EdgeDependsOn EdgeType = "DEPENDS_ON" EdgeExports EdgeType = "EXPORTS" EdgeDataFlows EdgeType = "DATA_FLOWS" // EdgeHandles is a synthetic edge injected by the heuristic pass (R1). // Direction: routeNode --HANDLES--> handlerFunction. // Represents framework routing registration: "this route dispatches to this handler." // Confidence is stored in the route node's metadata (key "confidence"). EdgeHandles EdgeType = "HANDLES" // R31: Documentation graph edges. // EdgeContains links a document file to its section nodes (doc→section) // and parent sections to child subsections (section→subsection). EdgeContains EdgeType = "CONTAINS" // EdgeExplains links a documentation section to a code entity it describes. // Direction: Section → code entity. Created by ResolveDocEdges post-parse. EdgeExplains EdgeType = "EXPLAINS" // EdgeDocumentedBy is the reverse of EXPLAINS: code entity → Section. // Enables get_context to surface documentation for any queried code entity. EdgeDocumentedBy EdgeType = "DOCUMENTED_BY" // EdgeLinksTo connects document nodes via markdown [text](path.md) links. // Direction: source document/section → target document node. EdgeLinksTo EdgeType = "LINKS_TO" // EdgeManual is a user-defined relationship created via link_entities. // Used when the relation string doesn't match a known catalog type. // BFS weight 0.5 — traversed but lower priority than structural code edges. EdgeManual EdgeType = "MANUAL" // EdgeDeploys links a code entity to the infrastructure resource that deploys it. // Direction: code entity → Terraform/k8s resource. EdgeDeploys EdgeType = "DEPLOYS" // EdgeConsumes links a code entity to the API endpoint or service it calls. // Direction: code entity → OpenAPI endpoint / gRPC service node. EdgeConsumes EdgeType = "CONSUMES" // EdgeConfiguredBy links a code entity to the config resource that controls it. // Direction: code entity → config resource (Terraform variable, k8s ConfigMap, etc.). EdgeConfiguredBy EdgeType = "CONFIGURED_BY" // EdgeDocuments links a documentation section to the code entity it describes. // Direction: docs section → code entity. Broader than EXPLAINS — used for // cross-domain docs (e.g. a README section about a Terraform module). EdgeDocuments EdgeType = "DOCUMENTS" // EdgeMentions is a synthetic cross-domain name-match edge. // Direction: any entity → any entity (cross-domain). Created by the name-matching // background pass (Sprint 16 #2) when two entities share the same name across domains. // Confidence 0.0–1.0 stored in edge metadata; only edges with confidence ≥ 0.6 are // auto-created. BFS weight is lower than structural edges to reflect uncertainty. EdgeMentions EdgeType = "MENTIONS" // EdgeContradicts links two entities that express conflicting information // (e.g. two doc sections making incompatible claims about a system). // Direction: newer/conflicting entity → established entity. EdgeContradicts EdgeType = "CONTRADICTS" // EdgeCausedBy links an effect entity to its cause // (e.g. "OutOfMemoryError" caused_by "LeakedConnection"). // Direction: effect → cause. EdgeCausedBy EdgeType = "CAUSED_BY" // EdgeInstanceOf links a specific entity to its general type or category // (e.g. "Redis" instance_of "CacheSystem"). // Direction: specific → general. EdgeInstanceOf EdgeType = "INSTANCE_OF" // EdgeRelatesTo is the generic fallback relationship for NL-extracted edges // where no more specific type applies. Used when Tier 2 classification is // unavailable or returns an unrecognized type. // Direction: source entity → related entity. EdgeRelatesTo EdgeType = "RELATES_TO" )
Edge type constants: classify the relationship between two graph nodes.
type EdgeTypeDescriptor ¶
type EdgeTypeDescriptor struct {
// Name is the EdgeType constant value (e.g. "CALLS").
Name EdgeType `json:"name"`
// Description is a human-readable explanation of what this edge means.
Description string `json:"description"`
// SemanticWeight is the default BFS traversal weight (matches DefaultEdgeWeights).
// Higher weight = edge traversed first and contributes more relevance to reachable nodes.
SemanticWeight float64 `json:"semantic_weight"`
// Direction is always "directed" for the current graph model.
// Reserved for future bidirectional edge types (e.g. cross-domain MENTIONS).
Direction string `json:"direction"`
// Domain classifies which knowledge domain this edge belongs to.
// Uses the same values as DomainType constants: DomainCode, DomainDocs,
// DomainInfra, DomainAPI, DomainKnowledge, DomainIssues, DomainCustom.
// Sprint 16 added infra, api, and knowledge domain edges (DEPLOYS, CONSUMES,
// CONFIGURED_BY, DOCUMENTS, MENTIONS).
Domain DomainType `json:"domain"`
// Synthetic marks edges injected by heuristic passes rather than derived from the AST.
// Synthetic edges carry an inherent confidence < 1.0 (stored in node metadata).
Synthetic bool `json:"synthetic,omitempty"`
}
EdgeTypeDescriptor captures the semantic metadata for a single edge type. The catalog is the authoritative source for BFS weights, domain tags, and human-readable descriptions — avoiding the need to scatter this information across multiple maps and comments throughout the codebase.
func GetEdgeTypes ¶
func GetEdgeTypes() []EdgeTypeDescriptor
GetEdgeTypes returns a copy of the EdgeTypeCatalog slice. Callers may safely range over or index the result without mutating shared state.
type EdgeWeightKey ¶
EdgeWeightKey uniquely identifies a specific directed edge in the graph. Used as a map key for per-edge learned weight multipliers (Sprint 15 #3).
type EntityInfo ¶
EntityInfo extends EntityRef with connectivity metrics.
type EntityRef ¶
type EntityRef struct {
ID NodeID `json:"id"`
Name string `json:"name"`
Type NodeType `json:"type"`
File string `json:"file"`
Line int `json:"line"`
}
EntityRef is a minimal reference to a node, used for lists like entry points.
type ErrNodeNotFound ¶
type ErrNodeNotFound NodeID
ErrNodeNotFound is returned when a query targets a non-existent node.
func (ErrNodeNotFound) Error ¶
func (e ErrNodeNotFound) Error() string
type FlatGraph ¶
type FlatGraph struct {
RepoID string
// Names holds compact IDs into the String Interning Pool.
Names []StringID
// Types holds the NodeType enum directly.
Types []NodeType
// FileIDs maps each node to the source file string ID.
FileIDs []StringID
// NamespaceIDs enables multi-monolith cross-linking without changing core logic.
NamespaceIDs []uint16
// Tombstones is a bitset-like array. If true, the node was deleted during
// an incremental file parse. Array compaction happens in the background.
Tombstones []bool
// TombstoneCount tracks how many nodes are deleted. When >15%, compaction is triggered.
TombstoneCount int
// OutEdges stores the destination NodeIndex of all outgoing edges continuously.
OutEdges []NodeIndex
// OutWeights stores the Semantic EdgeWeight for each corresponding edge in OutEdges.
OutWeights []float32
// OutOffsets denotes the starts and ends of a specific node's edges in the OutEdges slice.
// Node `i`'s edges are in OutEdges[OutOffsets[i] : OutOffsets[i+1]]
OutOffsets []uint64
// InEdges (Incoming edges) built identically to OutEdges for reverse lookups.
InEdges []NodeIndex
InWeights []float32
InOffsets []uint64
// contains filtered or unexported fields
}
FlatGraph is the V2 "Deterministic Core" engine. It uses a Struct-of-Arrays (SoA) layout instead of pointer-heavy maps. This ensures continuous memory allocation, maximizing CPU cache locality for BFS traversals and preventing GC pauses during million-node loads.
func Deserialize ¶
Deserialize reads the zstd BLOB and reconstructs the global FlatGraph.
func NewFlatGraph ¶
NewFlatGraph initializes an empty SoA Graph structure.
func (*FlatGraph) AddNode ¶
func (fg *FlatGraph) AddNode(name StringID, nodeType NodeType, fileID StringID, nsID uint16) NodeIndex
AddNode appends a new node into the SoA structure.
func (*FlatGraph) BulkAddEdges ¶
BulkAddEdges rebuilds CSR arrays from scratch given a sorted list of edges. This is O(E + N) instead of O(E * N) for AddEdge called E times.
func (*FlatGraph) ExtID ¶
ExtID maps our compact NodeIndex back to the stable string-based NodeID required by the MCP protocol communication.
func (*FlatGraph) LookupIndex ¶
LookupIndex returns the NodeIndex for a NodeID, or (0, false) if not found.
type Graph ¶
type Graph struct {
// contains filtered or unexported fields
}
Graph is the core in-memory code graph. It is safe for concurrent reads and writes — a RWMutex serialises mutations while allowing parallel queries.
func (*Graph) AddCallSite ¶
AddCallSite records an unresolved call site for post-parse resolution.
func (*Graph) AddEdge ¶
AddEdge inserts a directed edge. Both endpoint nodes must already exist; if either is absent the edge is silently dropped to avoid dangling refs. Duplicate edges (same From, To, Type) are silently dropped so that repeated calls from incremental reindex or heuristic passes are idempotent.
func (*Graph) AddImportAlias ¶
AddImportAlias records that file uses alias as the identifier for importPath. Called by Go parser when an explicit import alias is present (e.g., `import alias "github.com/foo/bar"`).
func (*Graph) AddInstantiatedType ¶
AddInstantiatedType records that typeName is explicitly constructed in file. Called by language parsers when they encounter constructor expressions (Java: new Foo(), TypeScript: new Foo()). Used by the resolver for RTA-style call graph refinement.
func (*Graph) AddNode ¶
AddNode inserts or replaces a node. If a node with the same ID already exists it is overwritten — the caller is responsible for deduplication. A stable UUID is generated for n.StableID if it is empty.
func (*Graph) AddTerraformRef ¶
func (g *Graph) AddTerraformRef(ref TerraformRef)
AddTerraformRef records an unresolved Terraform resource reference for post-parse cross-file DEPENDS_ON resolution.
func (*Graph) AddVarType ¶
AddVarType records that variable varName in file has type typeName. Called by language parsers during AST traversal to enable cross-file obj.method() resolution in the post-parse resolver pass.
func (*Graph) AllEdges ¶
AllEdges returns a snapshot of every edge in the graph, sorted by From, To, Type.
func (*Graph) BulkAddCallSites ¶
BulkAddCallSites appends multiple call sites in a single lock acquisition. Used by the watcher to re-register stored call sites from other files before a resolver pass so that ResolveCallEdges can recreate CALLS edges pointing into a file that was just re-parsed (those edges were deleted by RemoveFile).
func (*Graph) CarveEgoGraph ¶
func (g *Graph) CarveEgoGraph(rootID NodeID, cfg CarveConfig) (*SubGraph, error)
CarveEgoGraph extracts a relevance-ranked subgraph centred on the given root node.
When cfg.UsePPR is false (default), the algorithm is:
- BFS outward from root, up to cfg.MaxDepth hops.
- Each node is assigned a relevance score: relevance = edgeTypeWeight(edge) × (cfg.DecayFactor ^ hopCount)
- When a node is reachable via multiple paths the maximum score is kept.
- If the estimated token cost exceeds cfg.TokenBudget, the lowest-scored nodes are pruned (highest-hop, lowest-weight first).
- Only edges where both endpoints survived pruning are included.
When cfg.UsePPR is true, step 1-3 are replaced by Personalized PageRank (see pprScores). Steps 4-5 are identical. PPR captures multi-path importance that BFS max-score heuristic cannot represent.
func (*Graph) ClearFileSnapshot ¶
ClearFileSnapshot removes the stable ID snapshot for a file once migration is complete. Optional — snapshots are small and automatically replaced on the next SnapshotFileStableIDs call for the same file.
func (*Graph) Compact ¶
func (g *Graph) Compact()
Compact recreates the internal maps from scratch, allowing the Go runtime to release memory from deleted map buckets. Go maps do not shrink after deletions, so after thousands of incremental re-parses memory trends upward. Call this periodically (e.g. after a full reindex) to reclaim that memory.
func (*Graph) CrossDomainImpactForNode ¶
func (g *Graph) CrossDomainImpactForNode(nodeID NodeID) ([]CrossDomainRef, bool)
CrossDomainImpactForNode returns the cross-domain entities directly reachable from nodeID via cross-domain edges. It is the public entry point used by the struct/interface aggregation path in handleGetImpact. Returns the refs and a truncated flag (true when capped at maxCrossDomainImpactNodes).
func (*Graph) CrossRepoCalls ¶
CrossRepoCalls returns statistics about cross-repository CALLS edges. It iterates the internal edge map directly without allocating a snapshot slice. The returned linkedRepos slice is sorted and excludes primaryRepoID.
func (*Graph) DirectNeighbors ¶
DirectNeighbors returns deduplicated NodeIDs that are 1-hop away from id via any edge direction. Uses the FlatGraph fast path when available, otherwise falls back to OutEdges + InEdges. Returns nil if id is unknown.
func (*Graph) DrainCallSites ¶
DrainCallSites returns all pending call sites and clears the internal list. Called by the resolver after all files have been parsed.
func (*Graph) DrainTerraformRefs ¶
func (g *Graph) DrainTerraformRefs() []TerraformRef
DrainTerraformRefs returns all pending Terraform refs and clears the list. Must be called after all .tf files have been parsed.
func (*Graph) EdgeCountsByType ¶
EdgeCountsByType returns the number of edges per edge type.
func (*Graph) EdgesForFile ¶
EdgesForFile returns all edges where at least one endpoint belongs to the given file: outgoing edges from file nodes and incoming edges to file nodes. Self-edges within the same file appear exactly once. Complexity is O(total_nodes + file_edges) — significantly cheaper than AllEdges() + filter (O(E)) for per-file violation and analysis passes.
func (*Graph) EnableFlatGraph ¶
func (g *Graph) EnableFlatGraph()
EnableFlatGraph builds the FlatGraph from the current graph state and stores it for use as the PPR BFS fast path. Safe to call at any time; rebuilds atomically. The heavy construction runs outside g.mu; only the final pointer swap requires a write lock. Idempotent — calling multiple times is safe.
func (*Graph) FindByFile ¶
FindByFile returns all nodes whose File field matches the given path. The match is suffix-based so callers may pass either a full absolute path or a relative path such as "internal/graph/graph.go"; both resolve correctly against the absolute paths that the parser stores on each node.
Uses the secondary index for O(1) lookup when available; falls back to O(N) scan during initial parsing when the index is not yet ready.
func (*Graph) FindByName ¶
FindByName returns all nodes whose Name field matches the given string (case-insensitive). Also matches qualified names: searching "Close" will match a node named "Store.Close" (suffix after the last dot). An empty slice is returned if nothing matches.
Uses the secondary index for O(1) lookup when available; falls back to O(N) scan during initial parsing when the index is not yet ready.
func (*Graph) FindByPattern ¶
FindByPattern returns all nodes whose Name contains the given substring (case-insensitive). Useful for fuzzy "find entity" queries. On large graphs (100K+ nodes), consider FindByPatternLimit to cap the scan.
func (*Graph) FindByPatternLimit ¶
FindByPatternLimit is like FindByPattern but stops scanning after limit matches are found (0 = unlimited). This prevents O(N) full scans on hot paths where only a few results are needed. Results are sorted by ID for deterministic output.
func (*Graph) FindByType ¶
FindByType returns all nodes of the given NodeType.
func (*Graph) FindTestsFor ¶
FindTestsFor returns the files of test nodes that call into the given node, found via reverse-BFS over CALLS edges limited to test files. The result is a deduplicated sorted list of test file paths. Returns an empty slice when no test coverage is found.
func (*Graph) FindTestsWithDistance ¶
FindTestsWithDistance is like FindTestsFor but returns distance-scored results. Tests at distance 1 (directly call the entity) are "critical" — most likely to break. Distance 2 is "likely", distance 3+ is "peripheral".
func (*Graph) GetImportAliases ¶
GetImportAliases returns the alias → importPath map for the given file. Returns nil if no explicit import aliases were recorded for the file.
func (*Graph) GetInstantiatedTypes ¶
GetInstantiatedTypes returns the union of all instantiated type names across all files. Returns nil if no instantiation data was recorded (e.g. pure Go projects where constructor tracking is not implemented).
func (*Graph) GetVarTypes ¶
GetVarTypes returns the variable → type map for the given file. Returns nil if no type annotations were recorded for the file.
func (*Graph) HasEdge ¶
HasEdge reports whether an edge (from, to, edgeType) exists in the graph. O(1).
func (*Graph) ImpactAnalysis ¶
func (g *Graph) ImpactAnalysis(rootID NodeID, maxDepth int) (*ImpactResult, error)
ImpactAnalysis performs a reverse BFS from rootID following incoming CALLS and IMPLEMENTS edges to find all nodes that could be affected if rootID changes. Results are grouped into depth tiers: direct (depth 1), indirect (depth 2), peripheral (depth 3+). maxDepth caps the traversal (0 uses default of 3).
func (*Graph) InEdgesForFile ¶
InEdgesForFile returns all incoming edges to nodes whose File matches the given path. Complexity is O(total_nodes + file_in_edges). Paired with OutEdgesForFile to get all edges that touch a file without a full AllEdges() scan — used by CheckViolationsForFile for O(file_edges) violation checks.
func (*Graph) Index ¶
func (g *Graph) Index() *GraphIndex
Index returns the current columnar GraphIndex, or nil if it has not been built yet. Callers should check Index().Ready() before using it.
func (*Graph) InvalidateCache ¶
func (g *Graph) InvalidateCache()
InvalidateCache discards all cached subgraph results. Call this after any batch of graph mutations (e.g. after the watcher re-parses a file) so that subsequent get_context calls see fresh data.
func (*Graph) InvalidateCacheForFile ¶
InvalidateCacheForFile evicts only cached subgraphs that reference the given file. Entries for unrelated entities survive, dramatically improving cache hit rates when a single file changes. Prefer this over InvalidateCache when you know which file was modified.
func (*Graph) MakeNodeID ¶
MakeNodeID constructs a canonical NodeID from its components. Format: "repoID::file::name"
When the graph has a repo root set, file paths are stored as project-relative paths (e.g. "cmd/synapses/main.go" instead of "/Users/you/.../main.go"). This significantly reduces token consumption in MCP responses. Node.File retains the absolute path for internal operations like RemoveFile.
func (*Graph) MergeFrom ¶
MergeFrom copies all nodes, edges, varTypes, and instantiatedTypes from other into g. Existing nodes in g are never overwritten — other's data is purely additive. This is used at startup to merge federated (linked) project graphs and by the watcher to merge parallel-parsed temp graphs back into the main graph.
func (*Graph) MigrateStableID ¶
MigrateStableID attempts to reuse a stable UUID from a previous snapshot for the given node. It checks snapshots for the node's file in two tiers:
- Tier 1: exact (name, pkg, signature) match → certain same entity
- Tier 2: same (pkg, signature) with different name → likely rename
If no match is found, the node's current StableID is left unchanged. Must be called AFTER re-parsing and AddNode, but before SnapshotFileStableIDs is called again for the same file.
func (*Graph) NodeCountsByDomain ¶
func (g *Graph) NodeCountsByDomain() map[DomainType]int
NodeCountsByDomain returns a map of domain → node count for all nodes in the graph. Empty domain (code entities parsed before Sprint 16) is counted under DomainCode. This is O(N) under one read lock and does not copy node pointers — use it in hot paths like session_init instead of AllNodes().
func (*Graph) NodesForFile ¶
NodesForFile returns all nodes whose source file matches the given path. Used by the watcher to migrate stable IDs after a re-parse.
func (*Graph) OutEdgesForFile ¶
OutEdgesForFile returns all outgoing edges from nodes whose File matches the given path. Complexity is O(total_nodes + file_out_edges), which is significantly cheaper than AllEdges() + filter when only one file changed.
func (*Graph) PeekCallSites ¶
PeekCallSites returns a copy of all pending call sites without clearing them. Used to persist call sites to the store before the resolver drains them.
func (*Graph) ProjectIdentity ¶
func (g *Graph) ProjectIdentity() *ProjectIdentity
ProjectIdentity computes a compact architectural summary of the graph. This is the payload returned by the get_project_identity MCP tool. Results are cached for 30 seconds and invalidated on graph mutations.
func (*Graph) RebuildIndex ¶
RebuildIndex builds a fresh columnar GraphIndex from the current map state and atomically replaces g.index. Only one rebuild runs at a time. It returns the zstd-compressed snapshot bytes for the caller to persist to the store for fast warm-boot loading. Returns nil bytes on serialisation error. Typical usage:
go func() {
blob, err := g.RebuildIndex()
if err == nil { st.SaveIndexSnapshot(blob) }
}()
func (*Graph) RemoveCallSitesForFile ¶
RemoveCallSitesForFile removes any pending call sites whose CallerFile matches the given path. Called by the watcher before re-parsing a changed file so that stale call sites from the old version are not mixed with the newly parsed ones.
func (*Graph) RemoveEdge ¶
RemoveEdge removes a single directed edge. No-op if the edge does not exist. O(outDegree + inDegree) due to slice filtering — acceptable for manual edge removals which are rare and not on any hot path.
func (*Graph) RemoveFile ¶
RemoveFile removes all nodes and their associated edges for a given file path. Used by the file watcher to prune stale data before re-parsing.
func (*Graph) RemoveTerraformRefsForFile ¶
RemoveTerraformRefsForFile removes pending Terraform refs whose FromFile matches the given path. Called by the watcher before re-parsing a .tf file.
func (*Graph) Root ¶
Root returns the absolute filesystem path of the repository root. It is empty if the graph was loaded from a store that predates this field.
func (*Graph) SetFileProvenance ¶
func (g *Graph) SetFileProvenance(filePath string, p ProvenanceType)
SetFileProvenance sets the Provenance field on all nodes whose File matches filePath. Runs under a write lock to avoid the data race that would occur if callers mutated node pointers returned by FindByFile after releasing the lock.
func (*Graph) SetIndex ¶
func (g *Graph) SetIndex(idx *GraphIndex)
SetIndex atomically replaces the graph's columnar index with the provided one. Used during warm-boot to install a snapshot-loaded index without a full rebuild. Also sets g.pool to idx.Pool so subsequent RebuildIndex calls share the same pool.
func (*Graph) SnapshotCallsAdjacency ¶
SnapshotCallsAdjacency returns a snapshot of CALLS outgoing edges for all nodes.
func (*Graph) SnapshotEdgesAndNodes ¶
SnapshotEdgesAndNodes returns, under a single RLock, a complete snapshot of all outgoing edges by source node and all nodes by ID. This allows callers to perform graph traversal without per-node lock acquisitions.
func (*Graph) SnapshotFileStableIDs ¶
SnapshotFileStableIDs records the stable UUIDs of all nodes in the given file so that MigrateStableID can reuse them after the file is re-parsed. Must be called BEFORE RemoveFile for the migration to work correctly.
func (*Graph) SnapshotImportAdjacency ¶
SnapshotImportAdjacency returns, under a single RLock, a map of fileNodeID → []packageNodeID for all IMPORTS edges, plus a flat node map. This allows callers to build import lookup tables without per-node locking.
func (*Graph) SnapshotImportAliases ¶
SnapshotImportAliases returns a snapshot of all per-file explicit import alias mappings. The outer map is file path → alias → importPath.
func (*Graph) SuggestRules ¶
func (g *Graph) SuggestRules() []SuggestedRule
SuggestRules detects high-density CALLS coupling between directory groups. It groups CALLS edges by their source and target directories, then surfaces directory pairs where ≥85% of nodes in the from-dir call into the to-dir (minimum 3 samples) as suggested architectural rules.
Must be called under g.mu.Lock() — used by ProjectIdentity() which already holds the write lock. Do NOT call g.mu.Lock() inside this method.
Returns up to 5 suggestions ordered by confidence descending.
func (*Graph) ToFlatGraph ¶
ToFlatGraph converts the pointer-based Graph to a FlatGraph (SoA layout) for cache-friendly BFS traversal. The result is a snapshot — mutations to the original Graph are not reflected. Caller must hold no lock; this method acquires g.mu.RLock internally.
func (*Graph) UpdateFileNodeMetadata ¶
UpdateFileNodeMetadata updates metadata on all nodes belonging to the given file.
func (*Graph) UpdateNodeMetadata ¶
UpdateNodeMetadata calls update(n) for every node whose File matches absFile, holding the graph write lock for the duration. The callback may safely read and write n.Metadata — no structural graph changes (add/remove nodes or edges) should be made inside update.
This is the correct way to write node metadata from a background goroutine while the MCP server is live: git I/O should happen before calling this method; the write lock is held only for the in-memory metadata writes (typically microseconds). UpdateNodeMetadata applies update to the node with the given ID under the graph write lock. Safe for concurrent use.
func (*Graph) UpsertRouteNode ¶
UpsertRouteNode atomically inserts a synthetic route node if one with the same ID does not already exist. Returns true if the node was newly created. This is the safe alternative to the non-atomic GetNode+AddNode pattern: by holding the write lock for both the existence check and the insert, two concurrent incremental-reindex goroutines cannot both create the same route with different StableIDs.
type GraphIndex ¶
type GraphIndex struct {
// Pool is the shared string interning pool (from intern.go).
// All string columns below store StringID values.
Pool *StringPool
// Parallel node property slices — all indexed by sequential uint32 seq ID.
// seq 0 is reserved as the "null / not found" sentinel.
SeqIDs []NodeID // seq → original NodeID string
Types []StringID // seq → interned NodeType string
Names []StringID // seq → interned Name string
FileIDs []StringID // seq → interned File path string
PkgIDs []StringID // seq → interned Package string
Lines []int32 // seq → line number
Exported []bool // seq → exported flag
Tombstone []bool // seq → true means node is deleted (pending compaction)
// IDToSeq maps NodeID strings → seq for O(1) lookup in the BFS hot path.
IDToSeq map[NodeID]uint32
// CSR adjacency lists for outgoing edges.
// Node with seq i has outgoing edges in OutTargets[OutStart[i]:OutEnd[i]].
OutStart []uint32 // len = node count + 2 (1-indexed, sentinel at 0)
OutEnd []uint32 // len = node count + 2
OutTargets []uint32 // flattened target seq IDs
OutTypes []StringID // flattened edge type string IDs (parallel to OutTargets)
// CSR adjacency lists for incoming edges (same layout, reversed direction).
InStart []uint32
InEnd []uint32
InTargets []uint32
InTypes []StringID
// TombstoneCount tracks how many nodes are tombstoned.
// If TombstoneCount/len(SeqIDs) > 0.15, the background compactor triggers.
TombstoneCount int32 // atomic
// EigenvectorCentrality stores the normalized (0–1) eigenvector centrality
// for each node (1-indexed; position 0 is the sentinel, always 0.0).
// Computed once during buildIndex() / LoadSnapshot() via power iteration on
// the undirected adjacency. Architecturally important nodes (connected to
// other important nodes) get values close to 1.0; leaf/isolated nodes get 0.0.
// Applied in CarveEgoGraph as: relevance × (1 + centralityBeta × centrality).
EigenvectorCentrality []float64
// contains filtered or unexported fields
}
GraphIndex is a read-optimised, cache-friendly columnar view of the graph. It is rebuilt atomically after each parse cycle and never mutated in place — only tombstoned via MarkTombstone / replaced wholesale via RebuildIndex.
func LoadSnapshot ¶
func LoadSnapshot(data []byte, pool *StringPool) (*GraphIndex, error)
LoadSnapshot deserialises a zstd-compressed byte slice produced by SaveSnapshot and returns a ready GraphIndex. The provided pool is reused so strings already interned during a previous session share the same memory.
func (*GraphIndex) InNeighbours ¶
func (idx *GraphIndex) InNeighbours(seq uint32) (sources []uint32, types []StringID)
InNeighbours returns the slice of incoming (source seq, edge type StringID) values.
func (*GraphIndex) IsTombstoned ¶
func (idx *GraphIndex) IsTombstoned(seq uint32) bool
IsTombstoned returns true if the node at seq has been logically deleted.
func (*GraphIndex) MarkTombstone ¶
func (idx *GraphIndex) MarkTombstone(seq uint32)
MarkTombstone logically deletes node seq (e.g. when its source file is edited). The node remains in the slice arrays until the next compaction sweep.
func (*GraphIndex) NodeFile ¶
func (idx *GraphIndex) NodeFile(seq uint32) string
NodeFile returns the interned File path string for seq.
func (*GraphIndex) NodeName ¶
func (idx *GraphIndex) NodeName(seq uint32) string
NodeName returns the interned Name string for seq.
func (*GraphIndex) OutNeighbours ¶
func (idx *GraphIndex) OutNeighbours(seq uint32) (targets []uint32, types []StringID)
OutNeighbours returns the slice of outgoing (target seq, edge type StringID) values for node seq. The returned slices are direct subslices of internal arrays — callers must not modify them.
func (*GraphIndex) Ready ¶
func (idx *GraphIndex) Ready() bool
Ready returns true if the index has been built and is safe for BFS reads.
func (*GraphIndex) ReceiverMethodSeqs ¶
func (idx *GraphIndex) ReceiverMethodSeqs(receiverName string) []uint32
ReceiverMethodSeqs returns seq IDs of methods whose receiver matches the given name (case-insensitive). Used by CarveEgoGraph to seed BFS with struct/interface methods without scanning all nodes. The caller MUST already hold g.mu.RLock — this method does no locking.
func (*GraphIndex) SaveSnapshot ¶
func (idx *GraphIndex) SaveSnapshot() ([]byte, error)
SaveSnapshot serialises idx to a zstd-compressed byte slice. The caller is responsible for persisting the bytes (e.g. in the SQLite meta table).
func (*GraphIndex) Seq ¶
func (idx *GraphIndex) Seq(nid NodeID) uint32
Seq returns the sequential uint32 ID for nid, or 0 (sentinel) if not found.
func (*GraphIndex) TombstoneRatio ¶
func (idx *GraphIndex) TombstoneRatio() float64
TombstoneRatio returns the fraction of nodes that are tombstoned. Used by the background compactor to decide whether to trigger a rebuild.
func (*GraphIndex) UnsafeInNeighbours ¶
func (idx *GraphIndex) UnsafeInNeighbours(seq uint32) (sources []uint32, types []StringID)
UnsafeInNeighbours returns incoming neighbours without acquiring the RLock. Same safety requirements as UnsafeSeq.
func (*GraphIndex) UnsafeIsTombstoned ¶
func (idx *GraphIndex) UnsafeIsTombstoned(seq uint32) bool
UnsafeIsTombstoned checks the tombstone flag without acquiring the RLock. Same safety requirements as UnsafeSeq.
func (*GraphIndex) UnsafeOutNeighbours ¶
func (idx *GraphIndex) UnsafeOutNeighbours(seq uint32) (targets []uint32, types []StringID)
UnsafeOutNeighbours returns outgoing neighbours without acquiring the RLock. Same safety requirements as UnsafeSeq.
func (*GraphIndex) UnsafeSeq ¶
func (idx *GraphIndex) UnsafeSeq(nid NodeID) uint32
UnsafeSeq returns the sequential ID for nid without acquiring the RLock. The caller MUST guarantee that the index is immutable (ready == 1) and hold g.mu.RLock to prevent concurrent MarkTombstone writes.
type GraphSummary ¶
type GraphSummary struct {
Files int `json:"files"`
Packages int `json:"packages"`
Functions int `json:"functions"`
Methods int `json:"methods"`
Structs int `json:"structs"`
Interfaces int `json:"interfaces"`
Edges int `json:"edges"`
}
GraphSummary contains aggregate counts across the whole graph.
type ImpactResult ¶
type ImpactResult struct {
Root EntityRef `json:"root"`
Tiers []ImpactTier `json:"tiers"`
TotalAffected int `json:"total_affected"`
AffectedFiles []string `json:"affected_files"`
// Truncated is true when any tier was capped at maxImpactNodesPerTier.
// Check per-tier Truncated + TotalNodes for exact counts.
Truncated bool `json:"truncated,omitempty"`
// TestCoverage lists test files that exercise the root entity (R2).
// Populated by FindTestsFor via reverse-BFS over CALLS edges filtered to test files.
TestCoverage []string `json:"test_coverage,omitempty"`
// APISurface flags whether the root entity is part of the public API surface
// and lists external consumers. Populated when root is exported.
APISurface *APISurfaceInfo `json:"api_surface,omitempty"`
// TestPriority provides distance-scored test files: "critical" tests (distance 1)
// are most likely to break, "likely" (distance 2) somewhat likely, "peripheral" (3+)
// less likely. Sorted by distance ascending.
TestPriority []TestRef `json:"test_priority,omitempty"`
// ImplementorImpact lists types that implement the root interface/type.
// When an interface method signature changes, all implementors must update.
// Populated when root is an interface node or method on an interface.
ImplementorImpact []EntityRef `json:"implementor_impact,omitempty"`
// CrossDomainImpact lists entities in other knowledge domains that are
// directly connected to the root via cross-domain edges (DEPLOYS, CONSUMES,
// CONFIGURED_BY, DOCUMENTS, MENTIONS, MANUAL). Only edges with confidence ≥ 0.6
// or confirmed are included — this is enforced at edge-injection time so all
// edges present in the in-memory graph already satisfy the threshold.
// Sprint 16 #5: the killer feature — "what infra/API/docs does this touch?"
CrossDomainImpact []CrossDomainRef `json:"cross_domain_impact,omitempty"`
// CrossDomainAffected is the count of cross-domain entities in CrossDomainImpact.
// Kept separate from TotalAffected (which counts code-caller tier nodes) so
// callers can distinguish code blast-radius from cross-domain blast-radius.
CrossDomainAffected int `json:"cross_domain_affected,omitempty"`
// CrossDomainTruncated is true when CrossDomainImpact was capped at
// maxCrossDomainImpactNodes (100). The full count is not available.
CrossDomainTruncated bool `json:"cross_domain_truncated,omitempty"`
}
ImpactResult is returned by ImpactAnalysis.
type ImpactTier ¶
type ImpactTier struct {
Depth int `json:"depth"`
Label string `json:"label"` // "direct" | "indirect" | "peripheral"
Confidence float64 `json:"confidence"` // 1.0 / 0.6 / 0.3
Nodes []EntityRef `json:"nodes"`
Truncated bool `json:"truncated,omitempty"` // true when nodes were capped
TotalNodes int `json:"total_nodes,omitempty"` // actual count before cap
}
ImpactTier groups nodes at the same blast-radius hop distance.
type Node ¶
type Node struct {
ID NodeID `json:"id"`
Type NodeType `json:"type"`
Name string `json:"name"`
Package string `json:"package"`
File string `json:"file"`
Line int `json:"line"`
Exported bool `json:"exported"`
Metadata map[string]string `json:"metadata,omitempty"`
// StableID is a UUID v4 assigned on first creation and preserved across
// file renames and incremental re-parses. It provides a stable cross-project
// reference that does not change when a file is moved. Generated by
// Graph.AddNode if empty; migrated by Watcher.reparseFile via MigrateStableID.
StableID string `json:"stable_id,omitempty"`
// Provenance classifies the trust tier of this node's source file.
// Derived at index time; defaults to ProvenanceUserAuthored ("").
// Used by BFS ranking (user-authored nodes surface first) and as a
// Semantic Firewall gate on high-privilege operations.
Provenance ProvenanceType `json:"provenance,omitempty"`
// Domain classifies which knowledge domain this node belongs to.
// Defaults to DomainCode ("code") for all source-code entities.
// Future domain parsers (infra, api, docs, issues) set this at index time
// so that non-code nodes coexist in the same graph without ambiguity.
// An empty string is treated as DomainCode everywhere in the codebase.
Domain DomainType `json:"domain,omitempty"`
}
Node represents a single code entity in the graph.
type NodeID ¶
type NodeID string
NodeID is a composite identifier with the format: "repoID::file::name". Using a named type (not a plain string) enforces intent at compile time.
type NodeIndex ¶
type NodeIndex uint32
NodeIndex is a dense integer ID assigned sequentially to each parsed node. This replaces the string-based NodeID ("repo::file::name") in the core engine.
type NodeType ¶
type NodeType string
NodeType classifies what kind of code entity a node represents.
const ( NodeFile NodeType = "file" NodePackage NodeType = "package" NodeFunction NodeType = "function" NodeMethod NodeType = "method" NodeStruct NodeType = "struct" NodeInterface NodeType = "interface" NodeVariable NodeType = "variable" // NodeRoute is a virtual node injected by the heuristic pass (R1). // It represents an HTTP/RPC route registration (e.g. "GET /api/users"). // Not present in the AST — synthesised from framework registration patterns. NodeRoute NodeType = "route" // NodeSection is a documentation section extracted from a markdown file (R31). // Each ATX heading (# through ######) becomes a Section node with metadata: // title, depth (1-6), body_preview (first 200 chars), body (up to 2000 chars). NodeSection NodeType = "section" // NodeConcept is an abstract idea, algorithm, pattern, or methodology // extracted from documentation (e.g. "token bucket", "eventual consistency"). NodeConcept NodeType = "concept" // NodeEntity is a named real-world entity extracted from documentation // (e.g. a person, organization, product, or external system). NodeEntity NodeType = "entity" // NodeArtifact is a document, specification, standard, or law referenced // in documentation (e.g. "RFC 7519", "OpenAPI 3.0 spec", "GDPR Article 17"). NodeArtifact NodeType = "artifact" // NodeDecision is an architectural or design decision extracted from docs // (e.g. from ADR-style prose, decision logs, or architecture notes). NodeDecision NodeType = "decision" )
Node type constants: classify what kind of code entity a node represents.
type ProjectIdentity ¶
type ProjectIdentity struct {
RepoID string `json:"repo_id"`
Summary GraphSummary `json:"summary"`
EntryPoints []EntityRef `json:"entry_points"`
KeyEntities []EntityInfo `json:"key_entities"`
SuggestedRules []SuggestedRule `json:"suggested_rules,omitempty"`
// Scale is the repo size tier, computed from semantic node count.
Scale Scale `json:"scale"`
// ToolGuidance is a scale-aware recommendation for agents on which tools to prefer.
ToolGuidance string `json:"tool_guidance"`
}
ProjectIdentity is the compact architectural summary returned by get_project_identity.
type ProvenanceType ¶
type ProvenanceType string
ProvenanceType classifies the trust tier of a graph node. Derived at index time from file path patterns and content headers — no LLM needed.
const ( // ProvenanceUserAuthored is the default: files written by the user/team. ProvenanceUserAuthored ProvenanceType = "user-authored" // ProvenanceGenerated marks auto-generated files (protobuf, codegen, mocks). ProvenanceGenerated ProvenanceType = "generated" // ProvenanceVendored marks third-party dependency files (vendor/, node_modules/). ProvenanceVendored ProvenanceType = "vendored" // ProvenanceExternal marks content ingested from the web via scout sidecar. // // ARCHITECTURAL NOTE: This constant is defined and wired into the BFS weight // system (weight 0.2 — lowest tier) and the digest display layer, but it is // never set by any current code path. web_annotate() attaches web findings as // annotations on existing graph nodes — it does not create new NodeWebContent // nodes. A future implementation would create dedicated web-content nodes // tagged ProvenanceExternal when ingesting scout results. Until then this // constant is intentionally unused — do not remove it. ProvenanceExternal ProvenanceType = "external" )
type QualityNode ¶
QualityNode carries the graph identity and file context for a single node passed to CarveConfig.QualityScoreLookup. Name and File allow closures to convert to entityWithPath format without calling Graph.GetNode — which would attempt to re-acquire g.mu.RLock and potentially deadlock because CarveEgoGraph already holds the lock when it invokes QualityScoreLookup.
type Scale ¶
type Scale string
Scale classifies a project's size based on semantic node count (functions + methods + structs + interfaces). Used to give agents scale-aware guidance on when to prefer Synapses tools vs direct file access.
const ( // ScaleMicro represents projects <100 semantic nodes — Read/Grep often faster. ScaleMicro Scale = "micro" // ScaleSmall represents projects with 100–499 nodes — prefer Synapses for exploration. ScaleSmall Scale = "small" // ScaleMedium represents projects with 500–1999 nodes — strongly prefer Synapses tools. ScaleMedium Scale = "medium" // ScaleLarge represents projects with 2000+ nodes — always use Synapses tools. ScaleLarge Scale = "large" )
type StringPool ¶
type StringPool struct {
// contains filtered or unexported fields
}
StringPool implements a bi-directional mapping between strings and uint32 IDs. It leverages the Go 1.23 `unique` package to ensure that identical strings share the same underlying memory allocation across the entire application, massively reducing heap usage in large repositories.
StringPool is safe for concurrent use.
func NewStringPool ¶
func NewStringPool() *StringPool
NewStringPool creates a new, empty string interning pool.
func (*StringPool) Intern ¶
func (p *StringPool) Intern(s string) StringID
Intern takes a raw string, deduplicates its memory using the `unique` package, and returns a compact StringID. If the string has already been interned, it returns the existing ID.
func (*StringPool) Value ¶
func (p *StringPool) Value(id StringID) string
Value looks up the string associated with the given StringID. It handles both properly interned strings and transient "Ghost" strings.
type SubGraph ¶
type SubGraph struct {
Root NodeID `json:"root"`
Nodes []CarvedNode `json:"nodes"`
Edges []*Edge `json:"edges"`
Truncated bool `json:"truncated,omitempty"` // true when token budget cut BFS results
TruncatedCount int `json:"truncated_count,omitempty"` // number of nodes dropped by budget
}
SubGraph is the result of a context carve: a relevance-ranked slice of the graph.
type SuggestedRule ¶
type SuggestedRule struct {
// ID is a stable slug derived from the directory pair.
ID string `json:"id"`
// Description is a human-readable summary including sample counts.
Description string `json:"description"`
// Confidence is the fraction of from-dir nodes that call into to-dir (0–1).
Confidence float64 `json:"confidence"`
// SampleCount is the number of distinct from-dir nodes that exhibit the pattern.
SampleCount int `json:"sample_count"`
// FromDirPattern is a glob suitable for use as from_file_pattern in a rule.
FromDirPattern string `json:"from_dir_pattern"`
// ToDirPattern is a glob suitable for use as to_file_pattern in a rule.
ToDirPattern string `json:"to_dir_pattern"`
// EdgeType is the type of coupling detected (always EdgeCalls for now).
EdgeType EdgeType `json:"edge_type"`
}
SuggestedRule is a detected high-density structural coupling pattern. Returned in get_project_identity to surface architectural conventions that the team may want to formalise as explicit forbidden-edge rules.
type TerraformRef ¶
type TerraformRef struct {
FromID NodeID // node ID of the resource containing the reference
FromFile string // absolute path of the .tf file containing the reference
RefName string // target resource name: "type.name" or "data.type.name" or "module.name"
}
TerraformRef records an unresolved Terraform resource reference encountered during .tf file parsing. The resolver drains these after all files are parsed and creates DEPENDS_ON edges between resource nodes. This enables cross-file dependency resolution: a resource in vpc.tf can depend on one in compute.tf.
type TestRef ¶
type TestRef struct {
File string `json:"file"`
Distance int `json:"distance"` // BFS hops from changed entity
Priority string `json:"priority"` // TestPriorityCritical, TestPriorityLikely, TestPriorityPeripheral
}
TestRef represents a test file with its distance from the changed entity.