Documentation
¶
Overview ¶
Package analyze provides analyze functionality.
Index ¶
- Constants
- Variables
- func BuildCommitsByTick[V any](ticks []TICK, extract func(any) (map[string]V, bool)) map[int][]gitlib.Hash
- func EncodeUASTRecord(enc *gob.Encoder, changeIndex int, before, after *node.Node) error
- func GobDecode(raw []byte, dst any) error
- func HistoryKeysByID(leaves map[string]HistoryAnalyzer, ids []string) ([]string, error)
- func NormalizeFormat(format string) string
- func OutputHistoryResults(leaves []HistoryAnalyzer, results map[HistoryAnalyzer]Report, format string, ...) error
- func PrintHeader(writer io.Writer)
- func ReadRecordIfPresent[T any](reader ReportReader, kinds []string, kind string) (T, error)
- func ReadRecordsIfPresent[T any](reader ReportReader, kinds []string, kind string) ([]T, error)
- func RegisterPlotRenderer(fn PlotRenderer)
- func RegisterPlotSections(analyzerID string, fn SectionRendererFunc)
- func RegisterStorePlotSections(analyzerID string, fn StoreSectionRendererFunc)
- func ReportFunctionList(report Report, key string) ([]map[string]any, bool)
- func ReportFunctionListWithFallback(report Report, primaryKey, fallbackKey string) ([]map[string]any, bool)
- func ResolveFormats(format string, hasStatic, hasHistory bool) (staticFmt, historyFmt string, err error)
- func ResolveInputFormat(inputPath, inputFormat string) (string, error)
- func ShouldSkipFolderNode(path string, entry os.DirEntry, walkErr error, parser *uast.Parser) (bool, error)
- func StampSourceFile(reports map[string]Report, filePath string)
- func StreamUASTChanges(path string, changes gitlib.Changes) iter.Seq[uast.Change]
- func UniversalFormats() []string
- func ValidateFormat(format string, supported []string) (string, error)
- func ValidateUniversalFormat(format string) (string, error)
- func WriteConvertedOutput(model UnifiedModel, outputFormat string, writer io.Writer) error
- func WriteMergedTimeSeries(ts *MergedTimeSeries, writer io.Writer) error
- func WriteSliceKind[T any](w ReportWriter, kind string, records []T) error
- func WriteTimeSeriesNDJSON(ts *MergedTimeSeries, writer io.Writer) error
- type AggregationMode
- type AggregationModeAware
- type Aggregator
- type AggregatorFunc
- type AggregatorOptions
- type AggregatorSpillInfo
- type AnalysisVisitor
- type Analyzer
- type AnalyzerData
- type AnalyzerMode
- type AnalyzerResult
- type BaseHistoryAnalyzer
- func (b *BaseHistoryAnalyzer[M]) ApplySnapshot(_ PlumbingSnapshot)
- func (b *BaseHistoryAnalyzer[M]) AvgTCSize() int64
- func (b *BaseHistoryAnalyzer[M]) CPUHeavy() bool
- func (b *BaseHistoryAnalyzer[M]) Configure(_ map[string]any) error
- func (b *BaseHistoryAnalyzer[M]) Description() string
- func (b *BaseHistoryAnalyzer[M]) Descriptor() Descriptor
- func (b *BaseHistoryAnalyzer[M]) Flag() string
- func (b *BaseHistoryAnalyzer[M]) ListConfigurationOptions() []pipeline.ConfigurationOption
- func (b *BaseHistoryAnalyzer[M]) Name() string
- func (b *BaseHistoryAnalyzer[M]) NewAggregator(opts AggregatorOptions) Aggregator
- func (b *BaseHistoryAnalyzer[M]) ReleaseSnapshot(_ PlumbingSnapshot)
- func (b *BaseHistoryAnalyzer[M]) ReportFromTICKs(ctx context.Context, ticks []TICK) (Report, error)
- func (b *BaseHistoryAnalyzer[M]) SequentialOnly() bool
- func (b *BaseHistoryAnalyzer[M]) Serialize(result Report, format string, writer io.Writer) error
- func (b *BaseHistoryAnalyzer[M]) SerializeTICKs(ticks []TICK, format string, writer io.Writer) error
- func (b *BaseHistoryAnalyzer[M]) SnapshotPlumbing() PlumbingSnapshot
- func (b *BaseHistoryAnalyzer[M]) WorkingStateSize() int64
- type BaseReportSection
- func (b *BaseReportSection) AllIssues() []Issue
- func (b *BaseReportSection) Distribution() []DistributionItem
- func (b *BaseReportSection) KeyMetrics() []Metric
- func (b *BaseReportSection) Score() float64
- func (b *BaseReportSection) ScoreLabel() string
- func (b *BaseReportSection) SectionTitle() string
- func (b *BaseReportSection) StatusMessage() string
- func (b *BaseReportSection) TopIssues(_ int) []Issue
- type CommitIdentity
- type CommitLike
- type CommitMeta
- type CommitParents
- type CommitStatsDrainer
- type CommitTimeSeriesProvider
- type Context
- type Descriptor
- type DirectStoreWriter
- type DistributionItem
- type Factory
- type FileReportStore
- type GenericAggregator
- func (a *GenericAggregator[S, T]) Add(tc TC) error
- func (a *GenericAggregator[S, T]) Close() error
- func (a *GenericAggregator[S, T]) Collect() error
- func (a *GenericAggregator[S, T]) DiscardState()
- func (a *GenericAggregator[S, T]) DrainCommitStats() (stats map[string]any, tickHashes map[int][]gitlib.Hash)
- func (a *GenericAggregator[S, T]) EstimatedStateSize() int64
- func (a *GenericAggregator[S, T]) FlushAllTicks() ([]TICK, error)
- func (a *GenericAggregator[S, T]) FlushTick(tick int) (TICK, error)
- func (a *GenericAggregator[S, T]) RestoreSpillState(info AggregatorSpillInfo)
- func (a *GenericAggregator[S, T]) Spill() (int64, error)
- func (a *GenericAggregator[S, T]) SpillState() AggregatorSpillInfo
- type HistoryAnalyzer
- type Issue
- type ItemConverter
- type MergeTracker
- type MergedCommitData
- type MergedTimeSeries
- type Metric
- type MetricComputer
- type MultiAnalyzerTraverser
- type NDJSONLine
- type NodeVisitor
- type Parallelizable
- type PlotRenderer
- type PlumbingSnapshot
- type PreparationConfig
- type PreparedCommit
- type Registry
- func (r *Registry) All() []Descriptor
- func (r *Registry) Descriptor(id string) (Descriptor, bool)
- func (r *Registry) ExpandPatterns(patterns []string) ([]string, error)
- func (r *Registry) IDsByMode(mode AnalyzerMode) []string
- func (r *Registry) SelectedIDs(patterns []string) ([]string, error)
- func (r *Registry) Split(ids []string) (staticIDs, historyIDs []string, err error)
- type Report
- type ReportMeta
- type ReportReader
- type ReportSection
- type ReportSectionProvider
- type ReportStore
- type ReportWriter
- type ResultAggregator
- type SectionRendererFunc
- type SpillThresholdSetter
- type SpilledUASTRecord
- type StateSizer
- type StaticAnalyzer
- type StaticProgressEvent
- type StaticProgressFunc
- type StaticRenderer
- type StaticService
- func (svc *StaticService) AnalyzeFolder(ctx context.Context, rootPath string, analyzerList []string) (map[string]Report, error)
- func (svc *StaticService) AnalyzerNamesByID(ids []string) ([]string, error)
- func (svc *StaticService) BuildSections(results map[string]Report) []ReportSection
- func (svc *StaticService) FindAnalyzer(name string) StaticAnalyzer
- func (svc *StaticService) FormatCompact(results map[string]Report, noColor bool, writer io.Writer) error
- func (svc *StaticService) FormatJSON(results map[string]Report, writer io.Writer) error
- func (svc *StaticService) FormatPerAnalyzer(analyzerNames []string, results map[string]Report, format string, ...) error
- func (svc *StaticService) FormatPlotPages(analyzerNames []string, results map[string]Report, outputDir string) error
- func (svc *StaticService) FormatText(results map[string]Report, verbose, noColor bool, writer io.Writer) error
- func (svc *StaticService) RenderPlotPages(analyzerNames []string, results map[string]Report, outputDir string) ([]plotpage.PageMeta, error)
- func (svc *StaticService) ResolveMallocTrimInterval() int
- func (svc *StaticService) ResolveMaxWorkers() int
- func (svc *StaticService) RunAndFormat(ctx context.Context, path string, analyzerIDs []string, format string, ...) error
- type StoreSectionRendererFunc
- type StoreWriter
- type StreamingSink
- type TC
- type TCSink
- type TICK
- type Thresholds
- type TimeSeriesChunkFlusher
- type TypedCollection
- type UnifiedModel
- func DecodeBinaryInputModel(input []byte) (UnifiedModel, error)
- func DecodeCombinedBinaryReports(input []byte, ids []string, modes []AnalyzerMode) (UnifiedModel, error)
- func DecodeInputModel(input []byte, inputFormat string) (UnifiedModel, error)
- func ParseUnifiedModelJSON(data []byte) (UnifiedModel, error)
- type VisitorProvider
Constants ¶
const ( // FormatBinAlias is a short CLI alias for binary output. FormatBinAlias = "bin" // FormatText is the human-readable output format for CLI display. FormatText = "text" // FormatCompact is the single-line-per-analyzer static analysis output format. FormatCompact = "compact" // FormatTimeSeries is the unified time-series output format that merges // all history analyzer data into a single JSON array keyed by commit. FormatTimeSeries = "timeseries" // FormatNDJSON is the streaming output format that writes one JSON line // per TC as commits are processed. No aggregator, no buffering. FormatNDJSON = "ndjson" // FormatTimeSeriesNDJSON is the merged timeseries format written as // one JSON line per commit (NDJSON). Combines --format timeseries with --ndjson. FormatTimeSeriesNDJSON = "timeseries+ndjson" )
const ( FormatYAML = "yaml" FormatJSON = "json" FormatBinary = "binary" FormatPlot = "plot" )
Serialization format constants.
const ( SeverityGood = "good" SeverityFair = "fair" SeverityPoor = "poor" SeverityInfo = "info" )
Severity constants for Issue classification.
const ConfigTmpDir = "TmpDir"
ConfigTmpDir is the facts key for the global temporary directory override. When set, analyzers should use this directory for spill and hibernation files instead of os.TempDir.
const DefaultMallocTrimInterval = 50
DefaultMallocTrimInterval is the number of files between malloc_trim calls. Releases glibc arenas back to the OS to prevent native memory accumulation.
const DefaultProgressInterval = 1000
DefaultProgressInterval is the number of files between progress callback invocations.
const DefaultStaticMaxWorkers = 8
DefaultStaticMaxWorkers is the maximum number of concurrent file analysis workers when no explicit override is provided. Caps memory from concurrent UAST parse trees.
const InputFormatAuto = "auto"
InputFormatAuto is the default input format that triggers extension-based detection.
const ProgressPhaseComplete = "complete"
ProgressPhaseComplete indicates analysis has finished.
const ProgressPhaseProcessing = "processing"
ProgressPhaseProcessing indicates files are being analyzed.
const ReportKeyCommitMeta = "commit_meta"
ReportKeyCommitMeta is the Report key that carries per-commit metadata (timestamp, author) for timeseries output enrichment.
const ScoreInfoOnly = -1.0
ScoreInfoOnly indicates a section has no score (info only).
const ScoreLabelInfo = "Info"
ScoreLabelInfo is the label shown for info-only sections.
const SourceFileKey = "_source_file"
SourceFileKey is the report key used to stamp the originating source file.
const TimeSeriesModelVersion = "codefang.timeseries.v1"
TimeSeriesModelVersion is the schema version for unified time-series output.
const UnifiedModelVersion = "codefang.run.v1"
UnifiedModelVersion is the schema version for converted run outputs.
Variables ¶
var ( // ErrInvalidMixedFormat indicates a format that cannot be used in combined static+history runs. ErrInvalidMixedFormat = errors.New("invalid mixed format") // ErrInvalidStaticFormat indicates an invalid static analysis output format. ErrInvalidStaticFormat = errors.New("invalid static format") // ErrInvalidHistoryFormat indicates an invalid history analysis output format. ErrInvalidHistoryFormat = errors.New("invalid history format") // ErrInvalidInputFormat indicates an unrecognized input format. ErrInvalidInputFormat = errors.New("invalid input format") // ErrBinaryEnvelopeCount indicates an unexpected number of binary envelopes. ErrBinaryEnvelopeCount = errors.New("unexpected binary envelope count") )
var ErrAnalysisFailed = errors.New("analysis failed")
ErrAnalysisFailed indicates that one or more analyzers failed during parallel execution.
var ErrAnalyzerNotFound = errors.New("analyzer not found in store")
ErrAnalyzerNotFound is returned when opening a non-existent analyzer.
var ErrDuplicateAnalyzerID = errors.New("duplicate analyzer id")
ErrDuplicateAnalyzerID is returned when registry receives duplicate IDs.
var ErrInvalidAnalyzerGlob = errors.New("invalid analyzer glob")
ErrInvalidAnalyzerGlob is returned when a glob pattern is malformed.
var ErrInvalidAnalyzerMode = errors.New("invalid analyzer mode")
ErrInvalidAnalyzerMode is returned when analyzer mode mismatches runtime category.
var ErrInvalidUnifiedModel = errors.New("invalid unified model")
ErrInvalidUnifiedModel indicates malformed canonical conversion data.
var ErrMissingComputeMetrics = errors.New("missing ComputeMetricsFn hook")
ErrMissingComputeMetrics is returned when Serialize is called but ComputeMetricsFn is nil.
var ErrNilRootNode = errors.New("root node is nil")
ErrNilRootNode indicates that a nil root node was passed to an analyzer.
var ErrNotImplemented = errors.New("not implemented")
ErrNotImplemented is returned by stub methods that are not yet implemented.
var ErrRendererNotSet = errors.New("static service renderer not set")
ErrRendererNotSet is returned when a formatting method is called without a Renderer.
var ErrTornWrite = errors.New("torn write detected: writer was not closed")
ErrTornWrite is returned when a write was not finalized.
var ErrUnknownAnalyzerID = errors.New("unknown analyzer id")
ErrUnknownAnalyzerID is returned when registry lookup fails.
var ErrUnregisteredAnalyzer = errors.New("no registered analyzer with name")
ErrUnregisteredAnalyzer indicates that no analyzer with the given name is registered.
var ( // ErrUnsupportedFormat indicates the requested output format is not supported. ErrUnsupportedFormat = errors.New("unsupported format") )
var ErrWriterClosed = errors.New("report writer: write after close")
ErrWriterClosed is returned when writing to a closed writer.
Functions ¶
func BuildCommitsByTick ¶
func BuildCommitsByTick[V any](ticks []TICK, extract func(any) (map[string]V, bool)) map[int][]gitlib.Hash
BuildCommitsByTick converts ticks into a map from tick index to commit hashes. The extract callback should type-assert tick.Data and return the commit-keyed map from the analyzer's TickData type, or nil/false if the data is not the expected type or has no commits.
func EncodeUASTRecord ¶
EncodeUASTRecord writes a single UAST change record to the gob encoder.
func HistoryKeysByID ¶
func HistoryKeysByID(leaves map[string]HistoryAnalyzer, ids []string) ([]string, error)
HistoryKeysByID maps history analyzer IDs to their pipeline keys.
func NormalizeFormat ¶
NormalizeFormat canonicalizes a user-provided output format string.
func OutputHistoryResults ¶
func OutputHistoryResults( leaves []HistoryAnalyzer, results map[HistoryAnalyzer]Report, format string, writer io.Writer, ) error
OutputHistoryResults outputs the results for all selected history leaves.
func PrintHeader ¶
PrintHeader prints the codefang version header.
func ReadRecordIfPresent ¶
func ReadRecordIfPresent[T any](reader ReportReader, kinds []string, kind string) (T, error)
ReadRecordIfPresent reads a single record of the given kind from reader. If multiple records exist, the last one wins. Returns (zero, nil) if kind is not present in kinds.
func ReadRecordsIfPresent ¶
func ReadRecordsIfPresent[T any](reader ReportReader, kinds []string, kind string) ([]T, error)
ReadRecordsIfPresent reads all records of the given kind from reader, gob-decoding each into T and returning the collected slice. Returns (nil, nil) if kind is not present in kinds.
func RegisterPlotRenderer ¶
func RegisterPlotRenderer(fn PlotRenderer)
RegisterPlotRenderer sets the package-level plot renderer used by WriteConvertedOutput. It is intended to be called from the renderer package's init function.
func RegisterPlotSections ¶
func RegisterPlotSections(analyzerID string, fn SectionRendererFunc)
RegisterPlotSections registers a plot section renderer for the given analyzer ID.
func RegisterStorePlotSections ¶
func RegisterStorePlotSections(analyzerID string, fn StoreSectionRendererFunc)
RegisterStorePlotSections registers a store-aware plot section renderer for the given analyzer ID.
func ReportFunctionList ¶
ReportFunctionList extracts a []map[string]any from a report key. Handles direct typed values, TypedCollection wrappers, and JSON-decoded []any slices.
func ReportFunctionListWithFallback ¶
func ReportFunctionListWithFallback(report Report, primaryKey, fallbackKey string) ([]map[string]any, bool)
ReportFunctionListWithFallback extracts a function list from a report, trying the primary key first, then the fallback key.
func ResolveFormats ¶
func ResolveFormats(format string, hasStatic, hasHistory bool) (staticFmt, historyFmt string, err error)
ResolveFormats determines the output formats for static and history phases based on the user-provided format string and whether each phase is active.
func ResolveInputFormat ¶
ResolveInputFormat determines the input format from the provided path and explicit format hint. When the format is empty or InputFormatAuto, the extension of inputPath is used to detect the format.
func ShouldSkipFolderNode ¶
func ShouldSkipFolderNode(path string, entry os.DirEntry, walkErr error, parser *uast.Parser) (bool, error)
ShouldSkipFolderNode decides whether a folder walk entry should be skipped.
func StampSourceFile ¶
StampSourceFile adds "_source_file" metadata to every collection item in each report. This allows downstream consumers (e.g., plot generators) to group results by file/package. Handles both legacy []map[string]any collections and TypedCollection wrappers.
func StreamUASTChanges ¶
StreamUASTChanges deserializes a spill file and yields changes one by one. Each record's ChangeIndex is used to reconstruct the *gitlib.Change pointer from the provided changes slice.
func UniversalFormats ¶
func UniversalFormats() []string
UniversalFormats returns the canonical output formats supported by all analyzers.
func ValidateFormat ¶
ValidateFormat checks whether a format is in the provided support list.
func ValidateUniversalFormat ¶
ValidateUniversalFormat checks whether a format belongs to the universal contract.
func WriteConvertedOutput ¶
func WriteConvertedOutput(model UnifiedModel, outputFormat string, writer io.Writer) error
WriteConvertedOutput encodes the unified model into the requested output format and writes it to the provided writer.
func WriteMergedTimeSeries ¶
func WriteMergedTimeSeries(ts *MergedTimeSeries, writer io.Writer) error
WriteMergedTimeSeries encodes a MergedTimeSeries as indented JSON to the writer.
func WriteSliceKind ¶
func WriteSliceKind[T any](w ReportWriter, kind string, records []T) error
WriteSliceKind writes each element of a typed slice as a separate record under the given kind. Returns nil for empty or nil slices.
func WriteTimeSeriesNDJSON ¶
func WriteTimeSeriesNDJSON(ts *MergedTimeSeries, writer io.Writer) error
WriteTimeSeriesNDJSON writes a MergedTimeSeries as NDJSON — one JSON line per commit.
Types ¶
type AggregationMode ¶
type AggregationMode int
AggregationMode controls whether per-item data is collected during aggregation.
const ( // AggregationModeFull collects all per-item data (default, zero value). AggregationModeFull AggregationMode = iota // AggregationModeSummaryOnly skips per-item data collection. // MetricsProcessor continues normally; SpillableDataCollector and DetailedDataCollector become no-ops. AggregationModeSummaryOnly )
func ResolveAggregationMode ¶
func ResolveAggregationMode(format string) AggregationMode
ResolveAggregationMode returns the aggregation mode for a given output format. Text and compact formats only show summary metrics, so per-item data is skipped.
type AggregationModeAware ¶
type AggregationModeAware interface {
SetAggregationMode(mode AggregationMode)
}
AggregationModeAware is implemented by aggregators that support mode switching.
type Aggregator ¶
type Aggregator interface {
// Add ingests a single per-commit result.
Add(tc TC) error
// FlushTick finalizes and returns the aggregated result for the
// given tick. Precondition: tick >= 0.
FlushTick(tick int) (TICK, error)
// FlushAllTicks returns TICKs for all ticks that have accumulated data.
// For per-tick aggregators (byTick map), ticks are sorted ascending.
// For cumulative aggregators, returns a single TICK with all state.
// Returns nil, nil when the aggregator has no data.
FlushAllTicks() ([]TICK, error)
// Spill writes accumulated state to disk to free memory.
// Returns the number of bytes freed. A SpillBudget of 0 in
// AggregatorOptions means no spill limit (keep everything in memory).
Spill() (int64, error)
// Collect reloads previously spilled state back into memory.
Collect() error
// EstimatedStateSize returns the current in-memory footprint
// of the aggregator's accumulated state in bytes.
EstimatedStateSize() int64
// SpillState returns the current on-disk spill state for checkpoint persistence.
SpillState() AggregatorSpillInfo
// RestoreSpillState points the aggregator at a previously-saved spill directory.
// Called on checkpoint resume before any Add() calls.
RestoreSpillState(info AggregatorSpillInfo)
// Close releases all resources. Idempotent.
Close() error
}
Aggregator transforms a stream of TCs into TICKs. It is driven by a single goroutine; callers must serialize calls.
Lifecycle: Add() repeatedly → FlushTick() per tick boundary → Spill()/Collect() as needed → Close().
Close is idempotent and safe to call after errors.
type AggregatorFunc ¶
type AggregatorFunc func(opts AggregatorOptions) (Aggregator, error)
AggregatorFunc is the factory signature for creating an Aggregator from options. Concrete analyzers provide this via their registration.
type AggregatorOptions ¶
type AggregatorOptions struct {
// SpillBudget is the maximum bytes of aggregator state to keep
// in memory before spilling to disk. Zero means no limit.
SpillBudget int64
// SpillDir is the directory for spill files. Empty means the
// system default temporary directory.
SpillDir string
// Sampling is the commit sampling rate. Zero means no sampling
// (process every commit).
Sampling int
// Granularity is the tick granularity in hours. Zero means the
// pipeline default.
Granularity int
}
AggregatorOptions configures an Aggregator instance. A zero-value AggregatorOptions is valid and means: no spill limit, no spill directory, no sampling, default granularity.
type AggregatorSpillInfo ¶
type AggregatorSpillInfo struct {
// Dir is the directory containing spill files. Empty if no spills occurred.
Dir string `json:"dir,omitempty"`
// Count is the number of spill files written.
Count int `json:"count,omitempty"`
}
AggregatorSpillInfo describes the on-disk spill state of an Aggregator. Used by the checkpoint system to save and restore spill directories.
type AnalysisVisitor ¶
type AnalysisVisitor interface {
NodeVisitor
GetReport() Report
}
AnalysisVisitor extends NodeVisitor to provide analysis results.
type Analyzer ¶
type Analyzer interface {
Name() string
Flag() string
Descriptor() Descriptor
// Configuration.
ListConfigurationOptions() []pipeline.ConfigurationOption
Configure(facts map[string]any) error
}
Analyzer is the common base interface for all analyzers.
type AnalyzerData ¶
AnalyzerData pairs an analyzer flag with its per-commit extracted data.
type AnalyzerMode ¶
type AnalyzerMode string
AnalyzerMode identifies analyzer runtime mode.
const ( ModeStatic AnalyzerMode = "static" ModeHistory AnalyzerMode = "history" )
Analyzer modes.
type AnalyzerResult ¶
type AnalyzerResult struct {
ID string `json:"id" yaml:"id"`
Mode AnalyzerMode `json:"mode" yaml:"mode"`
Report Report `json:"report" yaml:"report"`
}
AnalyzerResult represents one analyzer report in canonical converted output.
type BaseHistoryAnalyzer ¶
type BaseHistoryAnalyzer[M any] struct { Desc Descriptor Sequential bool CPUHeavyFlag bool EstimatedStateSize int64 EstimatedTCSize int64 ConfigOptions []pipeline.ConfigurationOption // Hooks. ComputeMetricsFn MetricComputer[M] TicksToReportFn func(ctx context.Context, ticks []TICK) Report AggregatorFn func(opts AggregatorOptions) Aggregator // Custom format hooks — set these to handle FormatText and FormatPlot // without overriding Serialize/SerializeTICKs. When set, the base // dispatch checks them before falling through to JSON/YAML/Binary. SerializeTextFn func(result Report, writer io.Writer) error SerializePlotFn func(result Report, writer io.Writer) error }
BaseHistoryAnalyzer provides a complete default implementation for HistoryAnalyzer and Parallelizable. It is intended to be embedded by concrete analyzers to reduce boilerplate.
func (*BaseHistoryAnalyzer[M]) ApplySnapshot ¶
func (b *BaseHistoryAnalyzer[M]) ApplySnapshot(_ PlumbingSnapshot)
ApplySnapshot provides a default no-op implementation.
func (*BaseHistoryAnalyzer[M]) AvgTCSize ¶
func (b *BaseHistoryAnalyzer[M]) AvgTCSize() int64
AvgTCSize returns the estimated bytes of TC payload emitted per commit.
func (*BaseHistoryAnalyzer[M]) CPUHeavy ¶
func (b *BaseHistoryAnalyzer[M]) CPUHeavy() bool
CPUHeavy returns true if this analyzer's Consume() is CPU-intensive.
func (*BaseHistoryAnalyzer[M]) Configure ¶
func (b *BaseHistoryAnalyzer[M]) Configure(_ map[string]any) error
Configure is a default implementation that does nothing.
func (*BaseHistoryAnalyzer[M]) Description ¶
func (b *BaseHistoryAnalyzer[M]) Description() string
Description returns the analyzer description from the descriptor.
func (*BaseHistoryAnalyzer[M]) Descriptor ¶
func (b *BaseHistoryAnalyzer[M]) Descriptor() Descriptor
Descriptor returns stable analyzer metadata.
func (*BaseHistoryAnalyzer[M]) Flag ¶
func (b *BaseHistoryAnalyzer[M]) Flag() string
Flag returns the CLI flag for the analyzer, typically the part after "history/".
func (*BaseHistoryAnalyzer[M]) ListConfigurationOptions ¶
func (b *BaseHistoryAnalyzer[M]) ListConfigurationOptions() []pipeline.ConfigurationOption
ListConfigurationOptions returns the configurable options for this analyzer.
func (*BaseHistoryAnalyzer[M]) Name ¶
func (b *BaseHistoryAnalyzer[M]) Name() string
Name returns the analyzer name (derived from the descriptor ID).
func (*BaseHistoryAnalyzer[M]) NewAggregator ¶
func (b *BaseHistoryAnalyzer[M]) NewAggregator(opts AggregatorOptions) Aggregator
NewAggregator creates an aggregator using the configured AggregatorFn hook. Returns nil when no AggregatorFn is set (e.g., plumbing analyzers). Concrete analyzers that need runtime state at aggregator creation time (e.g., couples, burndown) should override this method.
func (*BaseHistoryAnalyzer[M]) ReleaseSnapshot ¶
func (b *BaseHistoryAnalyzer[M]) ReleaseSnapshot(_ PlumbingSnapshot)
ReleaseSnapshot provides a default no-op implementation.
func (*BaseHistoryAnalyzer[M]) ReportFromTICKs ¶
ReportFromTICKs converts aggregated TICKs into a Report.
func (*BaseHistoryAnalyzer[M]) SequentialOnly ¶
func (b *BaseHistoryAnalyzer[M]) SequentialOnly() bool
SequentialOnly returns true if this analyzer cannot be parallelized.
func (*BaseHistoryAnalyzer[M]) Serialize ¶
Serialize dynamically uses ComputeMetricsFn and standard encodings. Custom format hooks (SerializeTextFn, SerializePlotFn) are checked first, allowing analyzers to support text/plot output without overriding this method.
func (*BaseHistoryAnalyzer[M]) SerializeTICKs ¶
func (b *BaseHistoryAnalyzer[M]) SerializeTICKs(ticks []TICK, format string, writer io.Writer) error
SerializeTICKs uses TicksToReportFn and delegates to Serialize.
func (*BaseHistoryAnalyzer[M]) SnapshotPlumbing ¶
func (b *BaseHistoryAnalyzer[M]) SnapshotPlumbing() PlumbingSnapshot
SnapshotPlumbing provides a default no-op implementation.
func (*BaseHistoryAnalyzer[M]) WorkingStateSize ¶
func (b *BaseHistoryAnalyzer[M]) WorkingStateSize() int64
WorkingStateSize returns the estimated bytes of analyzer-internal working state.
type BaseReportSection ¶
BaseReportSection provides default implementations for ReportSection. Analyzers can embed this and override specific methods.
func (*BaseReportSection) AllIssues ¶
func (b *BaseReportSection) AllIssues() []Issue
AllIssues returns nil by default. Override to provide all issues.
func (*BaseReportSection) Distribution ¶
func (b *BaseReportSection) Distribution() []DistributionItem
Distribution returns nil by default. Override to provide distribution data.
func (*BaseReportSection) KeyMetrics ¶
func (b *BaseReportSection) KeyMetrics() []Metric
KeyMetrics returns nil by default. Override to provide metrics.
func (*BaseReportSection) Score ¶
func (b *BaseReportSection) Score() float64
Score returns the score value.
func (*BaseReportSection) ScoreLabel ¶
func (b *BaseReportSection) ScoreLabel() string
ScoreLabel returns formatted score or "Info" for info-only sections.
func (*BaseReportSection) SectionTitle ¶
func (b *BaseReportSection) SectionTitle() string
SectionTitle returns the display title.
func (*BaseReportSection) StatusMessage ¶
func (b *BaseReportSection) StatusMessage() string
StatusMessage returns the summary message.
func (*BaseReportSection) TopIssues ¶
func (b *BaseReportSection) TopIssues(_ int) []Issue
TopIssues returns nil by default. Override to provide top issues.
type CommitIdentity ¶
type CommitIdentity interface {
Hash() gitlib.Hash
Author() gitlib.Signature
Committer() gitlib.Signature
Message() string
}
CommitIdentity provides commit identification methods.
type CommitLike ¶
type CommitLike interface {
CommitIdentity
CommitParents
Tree() (*gitlib.Tree, error)
Files() (*gitlib.FileIter, error)
File(path string) (*gitlib.File, error)
}
CommitLike is an interface for commit-like objects (real or mock). It composes CommitIdentity and CommitParents with tree/file access.
type CommitMeta ¶
type CommitMeta struct {
Hash string `json:"hash"`
Timestamp string `json:"timestamp"`
Author string `json:"author"`
Tick int `json:"tick"`
}
CommitMeta carries per-commit metadata for time-series construction. Analyzers populate this during Consume() from the analyze.Context.
type CommitParents ¶
CommitParents provides access to parent commits.
type CommitStatsDrainer ¶
type CommitStatsDrainer interface {
// DrainCommitStats returns per-commit summary data and per-tick commit
// ordering, then clears these maps from the aggregator. Cumulative state
// (coupling matrices, burndown histories, etc.) remains intact.
//
// The returned commitData maps commit hash (hex) to a JSON-serializable
// summary (same shape as ExtractCommitTimeSeries output for this analyzer).
DrainCommitStats() (commitData map[string]any, commitsByTick map[int][]gitlib.Hash)
}
CommitStatsDrainer allows extracting and clearing per-commit data between chunks during streaming timeseries NDJSON output. Aggregators that store per-commit summary data implement this to enable per-chunk flushing and prevent OOM on large repos.
type CommitTimeSeriesProvider ¶
type CommitTimeSeriesProvider interface {
// ExtractCommitTimeSeries extracts per-commit data from a finalized report.
// Returns a map of commit hash (hex string) to a JSON-serializable value.
// Returns nil if no per-commit data is available.
ExtractCommitTimeSeries(report Report) map[string]any
}
CommitTimeSeriesProvider is implemented by analyzers that contribute per-commit data to the unified time-series output (--format timeseries). Replaces the global TickExtractor registry with compile-time interface dispatch.
type Context ¶
type Context struct {
Time time.Time
Commit CommitLike
Index int
IsMerge bool
// Changes contains the tree diff changes for this commit.
Changes gitlib.Changes
// BlobCache maps blob hashes to cached blobs.
// Populated by the runtime pipeline for efficient blob access.
BlobCache map[gitlib.Hash]*gitlib.CachedBlob
// FileDiffs maps file paths to diff data for modified files.
// Populated by the runtime pipeline using native C diff computation.
FileDiffs map[string]plumbing.FileDiffData
// UASTChanges contains pre-computed UAST changes for this commit.
// Populated by the UAST pipeline stage when enabled.
UASTChanges []uast.Change
// UASTSpillPath is the path to a spilled UAST gob file for large commits.
// When set, UASTChanges is nil and the changes must be deserialized from disk.
UASTSpillPath string
}
Context provides information about the current step in the analysis.
type Descriptor ¶
type Descriptor struct {
ID string
Description string
Mode AnalyzerMode
}
Descriptor contains stable analyzer metadata.
func NewDescriptor ¶
func NewDescriptor(mode AnalyzerMode, name, description string) Descriptor
NewDescriptor builds stable analyzer metadata from analyzer name and mode.
type DirectStoreWriter ¶
type DirectStoreWriter interface {
WriteToStoreFromAggregator(ctx context.Context, agg Aggregator, w ReportWriter) error
}
DirectStoreWriter is optionally implemented by HistoryAnalyzers that can write directly from their aggregator state to a ReportWriter. Unlike StoreWriter, this interface receives the aggregator after Collect() without FlushAllTicks, avoiding the deep copy overhead for large state.
type DistributionItem ¶
type DistributionItem struct {
Label string // Category label (e.g., "Simple (1-5)").
Percent float64 // Percentage as 0-1.
Count int // Absolute count.
}
DistributionItem represents a category in a distribution chart.
type Factory ¶
type Factory struct {
// contains filtered or unexported fields
}
Factory manages registration and execution of static analyzers.
func NewFactory ¶
func NewFactory(analyzers []StaticAnalyzer) *Factory
NewFactory creates a new factory instance.
func (*Factory) RegisterAnalyzer ¶
func (f *Factory) RegisterAnalyzer(analyzer StaticAnalyzer)
RegisterAnalyzer adds an analyzer to the registry.
func (*Factory) RunAnalyzer ¶
RunAnalyzer executes the specified analyzer.
type FileReportStore ¶
type FileReportStore struct {
// contains filtered or unexported fields
}
FileReportStore is a file-backed ReportStore using gob encoding. Directory layout: manifest.json + per-analyzer subdirectories with meta.json and <kind>.gob files.
func NewFileReportStore ¶
func NewFileReportStore(dir string) *FileReportStore
NewFileReportStore creates a file-backed ReportStore rooted at dir. If the directory already contains a manifest, it is loaded so that AnalyzerIDs returns the stored list without requiring new writes.
func (*FileReportStore) AnalyzerIDs ¶
func (s *FileReportStore) AnalyzerIDs() []string
AnalyzerIDs returns the ordered list of analyzer IDs that have been written.
func (*FileReportStore) Begin ¶
func (s *FileReportStore) Begin(analyzerID string, meta ReportMeta) (ReportWriter, error)
Begin starts writing records for the given analyzer.
func (*FileReportStore) Close ¶
func (s *FileReportStore) Close() error
Close releases store-level resources.
func (*FileReportStore) Open ¶
func (s *FileReportStore) Open(analyzerID string) (ReportReader, error)
Open returns a reader for the given analyzer's stored records.
type GenericAggregator ¶
type GenericAggregator[S any, T any] struct { Opts AggregatorOptions ByTick map[int]S SpillStore *spillstore.SpillStore[S] // Delegate Hooks. ExtractTCFn func(TC, map[int]S) error MergeStateFn func(S, S) S SizeStateFn func(S) int64 BuildTickFn func(int, S) (TICK, error) // DrainCommitDataFn extracts and clears per-commit data from a tick // accumulator state. Returns summarized per-commit data and commit ordering. // When nil, DrainCommitStats returns nil (CommitStatsDrainer not satisfied). DrainCommitDataFn func(S) (map[string]any, map[int][]gitlib.Hash) }
GenericAggregator manages per-tick state accumulation, spilling, and collection. S is the tick accumulator state (e.g., *TickAccumulator). T is the final tick data representation (e.g., *TickData).
func NewGenericAggregator ¶
func NewGenericAggregator[S any, T any]( opts AggregatorOptions, extractFn func(TC, map[int]S) error, mergeFn func(S, S) S, sizeFn func(S) int64, buildFn func(int, S) (TICK, error), ) *GenericAggregator[S, T]
NewGenericAggregator is a helper to create and initialize a GenericAggregator.
func (*GenericAggregator[S, T]) Add ¶
func (a *GenericAggregator[S, T]) Add(tc TC) error
Add ingests a single per-commit result. If the internal state size exceeds SpillBudget, it triggers a Spill.
func (*GenericAggregator[S, T]) Close ¶
func (a *GenericAggregator[S, T]) Close() error
Close releases all resources. Idempotent.
func (*GenericAggregator[S, T]) Collect ¶
func (a *GenericAggregator[S, T]) Collect() error
Collect reloads previously spilled state back into memory.
func (*GenericAggregator[S, T]) DiscardState ¶
func (a *GenericAggregator[S, T]) DiscardState()
DiscardState clears all in-memory cumulative state without serialization.
func (*GenericAggregator[S, T]) DrainCommitStats ¶
func (a *GenericAggregator[S, T]) DrainCommitStats() (stats map[string]any, tickHashes map[int][]gitlib.Hash)
DrainCommitStats implements CommitStatsDrainer when DrainCommitDataFn is set. Iterates all tick accumulators, extracts per-commit data, and merges the results.
func (*GenericAggregator[S, T]) EstimatedStateSize ¶
func (a *GenericAggregator[S, T]) EstimatedStateSize() int64
EstimatedStateSize returns the current in-memory footprint of the accumulated state.
func (*GenericAggregator[S, T]) FlushAllTicks ¶
func (a *GenericAggregator[S, T]) FlushAllTicks() ([]TICK, error)
FlushAllTicks returns TICKs for all ticks that have accumulated data, sorted ascending.
func (*GenericAggregator[S, T]) FlushTick ¶
func (a *GenericAggregator[S, T]) FlushTick(tick int) (TICK, error)
FlushTick finalizes and returns the aggregated result for the given tick.
func (*GenericAggregator[S, T]) RestoreSpillState ¶
func (a *GenericAggregator[S, T]) RestoreSpillState(info AggregatorSpillInfo)
RestoreSpillState points the aggregator at a previously-saved spill directory.
func (*GenericAggregator[S, T]) Spill ¶
func (a *GenericAggregator[S, T]) Spill() (int64, error)
Spill writes accumulated state to disk to free memory.
func (*GenericAggregator[S, T]) SpillState ¶
func (a *GenericAggregator[S, T]) SpillState() AggregatorSpillInfo
SpillState returns the current on-disk spill state for checkpoint persistence.
type HistoryAnalyzer ¶
type HistoryAnalyzer interface {
Analyzer
// Core analysis methods.
Initialize(repository *gitlib.Repository) error
// Consumption. Returns a TC with per-commit result data.
// Plumbing analyzers return zero-value TC (Data: nil).
Consume(ctx context.Context, ac *Context) (TC, error)
// Memory sizing for the planner.
// WorkingStateSize returns the estimated bytes of analyzer-internal
// working state accumulated per commit (maps, treaps, matrices).
WorkingStateSize() int64
// AvgTCSize returns the estimated bytes of TC payload emitted per commit.
AvgTCSize() int64
// Aggregation. NewAggregator creates a per-analyzer aggregator that
// collects TCs into TICKs. Returns nil when no aggregator is available.
NewAggregator(opts AggregatorOptions) Aggregator
// SerializeTICKs writes aggregated TICKs in the given format.
// Returns ErrNotImplemented when not yet wired.
SerializeTICKs(ticks []TICK, format string, writer io.Writer) error
// ReportFromTICKs converts aggregated TICKs into a Report.
// Returns ErrNotImplemented for analyzers without aggregators.
// ctx is used for cancellation and tracing (e.g. tree.Files() I/O).
ReportFromTICKs(ctx context.Context, ticks []TICK) (Report, error)
// Branching support.
Fork(n int) []HistoryAnalyzer
Merge(branches []HistoryAnalyzer)
// Formatting/Serialization.
// Format can be: "yaml", "json", or "binary" (protobuf).
Serialize(result Report, format string, writer io.Writer) error
}
HistoryAnalyzer interface defines the contract for history-based analyzers.
type Issue ¶
type Issue struct {
Name string // Item name (e.g., function name).
Location string // File location (e.g., "pkg/foo/bar.go:42").
Value string // Metric value (e.g., "12").
Severity string // "good", "fair", "poor", or "info".
}
Issue represents a problem or item to highlight.
type ItemConverter ¶
ItemConverter converts a typed items slice and source file path into []map[string]any. The sourceFile parameter is the path stamped by StampSourceFile; when non-empty, the converter should include it as "_source_file" in each output map.
type MergeTracker ¶
type MergeTracker struct {
// contains filtered or unexported fields
}
MergeTracker deduplicates merge commits using a Bloom filter. It replaces the per-analyzer map[gitlib.Hash]bool pattern with a memory-efficient probabilistic structure.
A false positive (rate ≤ 0.1%) means a merge commit is incorrectly considered already-seen and skipped. At 0.1% over 1000 merges, the expected number of wrongly skipped merges is ~1.
func NewMergeTracker ¶
func NewMergeTracker() *MergeTracker
NewMergeTracker creates a new merge commit deduplication tracker.
func (*MergeTracker) MarshalBinary ¶
func (mt *MergeTracker) MarshalBinary() ([]byte, error)
MarshalBinary encodes the tracker state for checkpoint serialization.
func (*MergeTracker) Reset ¶
func (mt *MergeTracker) Reset()
Reset clears the tracker, allowing it to be reused for a new chunk.
func (*MergeTracker) SeenOrAdd ¶
func (mt *MergeTracker) SeenOrAdd(hash gitlib.Hash) bool
SeenOrAdd checks if a merge commit has been seen before and marks it as seen. Returns true if the commit was already seen (should be skipped).
func (*MergeTracker) UnmarshalBinary ¶
func (mt *MergeTracker) UnmarshalBinary(data []byte) error
UnmarshalBinary restores tracker state from checkpoint data.
type MergedCommitData ¶
type MergedCommitData struct {
Hash string `json:"hash"`
Timestamp string `json:"timestamp"`
Author string `json:"author"`
Tick int `json:"tick"`
Analyzers map[string]any `json:"-"`
}
MergedCommitData holds merged analyzer data for a single commit.
func (MergedCommitData) MarshalJSON ¶
func (m MergedCommitData) MarshalJSON() ([]byte, error)
MarshalJSON flattens commit metadata and per-analyzer data into a single object: {"hash": "...", "timestamp": "...", "author": "...", "tick": N, "quality": {...}, ...}.
type MergedTimeSeries ¶
type MergedTimeSeries struct {
Version string `json:"version"`
TickSizeHours float64 `json:"tick_size_hours"`
Analyzers []string `json:"analyzers"`
Commits []MergedCommitData `json:"commits"`
}
MergedTimeSeries is the top-level unified time-series output structure.
func BuildMergedTimeSeriesDirect ¶
func BuildMergedTimeSeriesDirect( active []AnalyzerData, commitMeta []CommitMeta, tickSizeHours float64, ) *MergedTimeSeries
BuildMergedTimeSeriesDirect builds a unified time-series from pre-extracted per-analyzer commit data. Callers collect AnalyzerData via CommitTimeSeriesProvider.ExtractCommitTimeSeries on each leaf analyzer.
type Metric ¶
type Metric struct {
Label string // Display label (e.g., "Total Functions").
Value string // Pre-formatted value (e.g., "156").
}
Metric represents a key-value metric for display.
type MetricComputer ¶
MetricComputer defines how raw report data is converted to typed metrics.
func SafeMetricComputer ¶
func SafeMetricComputer[M any](compute MetricComputer[M], empty M) MetricComputer[M]
SafeMetricComputer wraps a MetricComputer to return the empty value when the report is empty, avoiding nil-pointer panics or meaningless computation in downstream metric logic. Non-empty reports are forwarded to compute.
type MultiAnalyzerTraverser ¶
type MultiAnalyzerTraverser struct {
// contains filtered or unexported fields
}
MultiAnalyzerTraverser manages multiple visitors for UAST traversal. Uses iterative depth-first traversal to avoid stack overflow on deep trees and eliminate function call overhead from recursion.
func NewMultiAnalyzerTraverser ¶
func NewMultiAnalyzerTraverser() *MultiAnalyzerTraverser
NewMultiAnalyzerTraverser creates a new MultiAnalyzerTraverser.
func (*MultiAnalyzerTraverser) RegisterVisitor ¶
func (t *MultiAnalyzerTraverser) RegisterVisitor(v NodeVisitor)
RegisterVisitor registers a visitor to be called during traversal.
func (*MultiAnalyzerTraverser) Traverse ¶
func (t *MultiAnalyzerTraverser) Traverse(root *node.Node)
Traverse performs iterative pre/post-order traversal of the UAST tree. Each node receives OnEnter before its children and OnExit after all children have been fully traversed — matching the previous recursive semantics without risk of stack overflow.
type NDJSONLine ¶
type NDJSONLine struct {
Hash string `json:"hash"`
Tick int `json:"tick"`
AuthorID int `json:"author_id"`
Timestamp string `json:"timestamp"`
Analyzer string `json:"analyzer"`
Data any `json:"data"`
}
NDJSONLine is the JSON structure for one NDJSON output line.
type NodeVisitor ¶
NodeVisitor defines the interface for UAST visitors.
type Parallelizable ¶
type Parallelizable interface {
// SequentialOnly returns true if this analyzer cannot be parallelized
// (e.g. it tracks cumulative state across all commits).
SequentialOnly() bool
// CPUHeavy returns true if this analyzer's Consume() is CPU-intensive
// (e.g. UAST processing) and benefits from W parallel workers.
// Lightweight analyzers return false and run on the main goroutine
// to avoid fork/merge overhead.
CPUHeavy() bool
// SnapshotPlumbing captures the current plumbing output state.
// Called once per commit after core analyzers have run.
// The returned value is opaque to the framework.
SnapshotPlumbing() PlumbingSnapshot
// ApplySnapshot restores plumbing state from a previously captured snapshot.
// Called on forked copies before Consume().
ApplySnapshot(snapshot PlumbingSnapshot)
// ReleaseSnapshot releases any resources owned by the snapshot
// (e.g. UAST trees). Called once per snapshot after all leaves
// in the worker have consumed it.
ReleaseSnapshot(snapshot PlumbingSnapshot)
}
Parallelizable is optionally implemented by leaf analyzers that support parallel execution via the framework's Fork/Merge worker pool. The framework uses these methods instead of type-switching on concrete types.
type PlotRenderer ¶
type PlotRenderer func(model UnifiedModel, writer io.Writer) error
PlotRenderer is a function that renders a UnifiedModel as a plot to the given writer. It is provided by the renderer package to avoid import cycles.
type PlumbingSnapshot ¶
type PlumbingSnapshot any
PlumbingSnapshot is an opaque snapshot of plumbing state for one commit. The framework treats this as an opaque value; concrete snapshot types are defined in the plumbing package.
type PreparationConfig ¶
type PreparationConfig struct {
// Tick0 is the time of the first commit (for tick calculation).
Tick0 time.Time
// TickSize is the duration of one tick.
TickSize time.Duration
// PeopleDict maps author keys to author IDs.
PeopleDict map[string]int
}
PreparationConfig holds configuration for commit preparation.
type PreparedCommit ¶
type PreparedCommit struct {
Ctx *Context
Changes []*gitlib.Change
Cache map[gitlib.Hash]*gitlib.CachedBlob
FileDiffs map[string]pkgplumbing.FileDiffData
Index int
Err error
// AuthorID is the resolved author identifier.
AuthorID int
// Tick is the time tick for this commit.
Tick int
}
PreparedCommit holds all pre-computed data for a single commit. This is used by the pipelined runner to pass pre-fetched data to analyzers.
type Registry ¶
type Registry struct {
// contains filtered or unexported fields
}
Registry stores analyzer metadata with deterministic ordering.
func NewRegistry ¶
func NewRegistry(static []StaticAnalyzer, history []HistoryAnalyzer) (*Registry, error)
NewRegistry creates a registry from analyzer descriptors.
func (*Registry) All ¶
func (r *Registry) All() []Descriptor
All returns all descriptors in stable order.
func (*Registry) Descriptor ¶
func (r *Registry) Descriptor(id string) (Descriptor, bool)
Descriptor returns analyzer metadata for the given ID.
func (*Registry) ExpandPatterns ¶
ExpandPatterns expands glob patterns against registered analyzer IDs. Duplicate IDs across patterns are removed; first occurrence wins.
func (*Registry) IDsByMode ¶
func (r *Registry) IDsByMode(mode AnalyzerMode) []string
IDsByMode returns IDs for the given mode in stable order.
func (*Registry) SelectedIDs ¶
SelectedIDs returns the analyzer IDs for the given patterns, or all IDs if none specified.
type ReportMeta ¶
type ReportMeta struct {
AnalyzerID string `json:"analyzer_id"`
Version string `json:"version"`
SchemaHash string `json:"schema_hash"`
}
ReportMeta describes metadata for a single analyzer's report store entry.
type ReportReader ¶
type ReportReader interface {
// Meta returns the metadata for this analyzer's report.
Meta() ReportMeta
// Kinds returns the list of record kinds stored for this analyzer.
Kinds() []string
// Iter calls fn for each raw record of the given kind.
// Iteration stops early if fn returns a non-nil error.
Iter(kind string, fn func(raw []byte) error) error
// Close releases resources. Idempotent.
Close() error
}
ReportReader streams records for one analyzer. Memory footprint: one decoded record at a time.
type ReportSection ¶
type ReportSection interface {
// SectionTitle returns the display title (e.g., "COMPLEXITY").
SectionTitle() string
// Score returns a 0-1 score, or ScoreInfoOnly for info-only sections.
Score() float64
// ScoreLabel returns formatted score (e.g., "8/10" or "Info").
ScoreLabel() string
// StatusMessage returns a summary message (e.g., "Good - reasonable complexity").
StatusMessage() string
// KeyMetrics returns ordered key metrics for display.
KeyMetrics() []Metric
// Distribution returns distribution data for bar charts.
Distribution() []DistributionItem
// TopIssues returns the top N issues/items to highlight.
TopIssues(n int) []Issue
// AllIssues returns all issues for verbose mode.
AllIssues() []Issue
}
ReportSection provides a standardized structure for analyzer reports. Analyzers implement this to enable unified rendering.
type ReportSectionProvider ¶
type ReportSectionProvider interface {
CreateReportSection(report Report) ReportSection
}
ReportSectionProvider can create a ReportSection from report data. Analyzers implement this to enable executive summary generation.
type ReportStore ¶
type ReportStore interface {
// Begin starts writing records for the given analyzer.
Begin(analyzerID string, meta ReportMeta) (ReportWriter, error)
// Open opens a reader for the given analyzer's stored records.
Open(analyzerID string) (ReportReader, error)
// AnalyzerIDs returns the ordered list of analyzer IDs in the store.
AnalyzerIDs() []string
// Close releases store-level resources. Idempotent.
Close() error
}
ReportStore manages per-analyzer report artifacts. Writers and readers are created one at a time; no concurrent access.
type ReportWriter ¶
type ReportWriter interface {
// Write appends one typed record under the given kind.
Write(kind string, record any) error
// Close finalizes the write. After Close, the data is durable.
// Idempotent: second call is a no-op.
Close() error
}
ReportWriter appends typed records for one analyzer. Data becomes visible only after Close completes successfully.
type ResultAggregator ¶
ResultAggregator defines the interface for aggregating analyzer results.
type SectionRendererFunc ¶
SectionRendererFunc generates plot sections from a raw report for a specific analyzer.
func PlotSectionsFor ¶
func PlotSectionsFor(analyzerID string) SectionRendererFunc
PlotSectionsFor returns the registered section renderer for an analyzer ID, or nil.
type SpillThresholdSetter ¶
type SpillThresholdSetter interface {
SetSpillThreshold(threshold int)
}
SpillThresholdSetter is implemented by aggregators that support configurable spill-to-disk thresholds. Used by StaticService to apply budget-derived thresholds.
type SpilledUASTRecord ¶
SpilledUASTRecord is the gob-serialized record for one UAST file change. ChangeIndex references into the CommitData.Changes slice to reconstruct the *gitlib.Change pointer on deserialization.
type StateSizer ¶
type StateSizer interface {
EstimatedStateSize() int64
}
StateSizer is implemented by aggregators that can estimate their in-memory state size. Used by StaticService to log aggregator memory usage at pipeline milestones.
type StaticAnalyzer ¶
type StaticAnalyzer interface {
Analyzer
Analyze(root *node.Node) (Report, error)
Thresholds() Thresholds
// Aggregation methods.
CreateAggregator() ResultAggregator
// Formatting methods.
FormatReport(report Report, writer io.Writer) error
FormatReportJSON(report Report, writer io.Writer) error
FormatReportYAML(report Report, writer io.Writer) error
FormatReportPlot(report Report, writer io.Writer) error
FormatReportBinary(report Report, writer io.Writer) error
}
StaticAnalyzer interface defines the contract for UAST-based static analysis.
type StaticProgressEvent ¶
type StaticProgressEvent struct {
FilesProcessed int64
RSSBytes int64
AggregatorSize int64
Phase string
}
StaticProgressEvent represents a static analysis progress milestone.
type StaticProgressFunc ¶
type StaticProgressFunc func(event StaticProgressEvent)
StaticProgressFunc is called at key pipeline milestones.
type StaticRenderer ¶
type StaticRenderer interface {
// SectionsToJSON converts report sections to a JSON-serializable value.
SectionsToJSON(sections []ReportSection) any
// RenderText writes human-readable text output for the given sections.
RenderText(sections []ReportSection, verbose, noColor bool, writer io.Writer) error
// RenderCompact writes single-line-per-section compact output.
RenderCompact(sections []ReportSection, noColor bool, writer io.Writer) error
}
StaticRenderer abstracts section-based rendering to avoid import cycles between the analyze and renderer packages. The renderer package provides the production implementation.
type StaticService ¶
type StaticService struct {
Analyzers []StaticAnalyzer
// MaxWorkers limits the number of concurrent file analysis goroutines.
// Zero means use min(runtime.NumCPU(), DefaultStaticMaxWorkers).
MaxWorkers int
// MallocTrimInterval is the number of files between native memory trim calls.
// Zero means use DefaultMallocTrimInterval. Negative disables trimming.
MallocTrimInterval int
// NativeMemoryReleaseFn is called periodically to release native memory.
// Defaults to gitlib.ReleaseNativeMemory when nil.
NativeMemoryReleaseFn func()
// AggregationMode controls whether per-item data is collected during aggregation.
// Full (default) collects all data. SummaryOnly skips per-item collection.
AggregationMode AggregationMode
// SpillThreshold overrides the default spill-to-disk threshold on aggregators.
// Zero means use the aggregator default. Derived from --memory-budget.
SpillThreshold int
// ProgressFunc is called at pipeline milestones when non-nil.
// Called every ProgressInterval files during processing, and once after completion.
ProgressFunc StaticProgressFunc
// ProgressInterval is the number of files between progress callbacks.
// Zero means use DefaultProgressInterval.
ProgressInterval int
// Renderer provides section-based output rendering.
// Must be set before calling FormatJSON, FormatText, FormatCompact, or RunAndFormat.
Renderer StaticRenderer
}
StaticService provides a high-level interface for running static analysis.
func NewStaticService ¶
func NewStaticService(analyzers []StaticAnalyzer) *StaticService
NewStaticService creates a StaticService with the given analyzers.
func (*StaticService) AnalyzeFolder ¶
func (svc *StaticService) AnalyzeFolder(ctx context.Context, rootPath string, analyzerList []string) (map[string]Report, error)
AnalyzeFolder runs static analyzers for supported files in a folder tree. File discovery streams paths to workers via a channel, providing natural backpressure.
func (*StaticService) AnalyzerNamesByID ¶
func (svc *StaticService) AnalyzerNamesByID(ids []string) ([]string, error)
AnalyzerNamesByID resolves analyzer descriptor IDs to internal names.
func (*StaticService) BuildSections ¶
func (svc *StaticService) BuildSections(results map[string]Report) []ReportSection
BuildSections creates ReportSection instances from results in deterministic order.
func (*StaticService) FindAnalyzer ¶
func (svc *StaticService) FindAnalyzer(name string) StaticAnalyzer
FindAnalyzer finds an analyzer by name.
func (*StaticService) FormatCompact ¶
func (svc *StaticService) FormatCompact(results map[string]Report, noColor bool, writer io.Writer) error
FormatCompact renders analysis results as single-line-per-analyzer compact output.
func (*StaticService) FormatJSON ¶
FormatJSON encodes analysis results as indented JSON.
func (*StaticService) FormatPerAnalyzer ¶
func (svc *StaticService) FormatPerAnalyzer( analyzerNames []string, results map[string]Report, format string, writer io.Writer, ) error
FormatPerAnalyzer renders results using per-analyzer formatters (YAML, plot, or binary).
func (*StaticService) FormatPlotPages ¶
func (svc *StaticService) FormatPlotPages( analyzerNames []string, results map[string]Report, outputDir string, ) error
FormatPlotPages renders multi-page HTML plot output to outputDir. Each analyzer gets its own HTML page plus an index page with navigation. FRD: specs/frds/FRD-20260312-static-plot-multipage.md.
func (*StaticService) FormatText ¶
func (svc *StaticService) FormatText(results map[string]Report, verbose, noColor bool, writer io.Writer) error
FormatText renders analysis results as human-readable text with optional color and verbosity.
func (*StaticService) RenderPlotPages ¶
func (svc *StaticService) RenderPlotPages( analyzerNames []string, results map[string]Report, outputDir string, ) ([]plotpage.PageMeta, error)
RenderPlotPages renders per-analyzer HTML pages to outputDir without an index. Returns page metadata for later index rendering.
func (*StaticService) ResolveMallocTrimInterval ¶
func (svc *StaticService) ResolveMallocTrimInterval() int
ResolveMallocTrimInterval returns the effective trim interval. Zero resolves to DefaultMallocTrimInterval. Negative means disabled (returns -1).
func (*StaticService) ResolveMaxWorkers ¶
func (svc *StaticService) ResolveMaxWorkers() int
ResolveMaxWorkers returns the effective worker count for parallel file analysis. Zero resolves to min(runtime.NumCPU(), DefaultStaticMaxWorkers).
type StoreSectionRendererFunc ¶
type StoreSectionRendererFunc func(reader ReportReader) ([]plotpage.Section, error)
StoreSectionRendererFunc renders plot sections from a ReportReader. Used by analyzers that implement DirectStoreWriter and emit structured kinds instead of a monolithic "report" gob record.
func StorePlotSectionsFor ¶
func StorePlotSectionsFor(analyzerID string) StoreSectionRendererFunc
StorePlotSectionsFor returns the registered store section renderer for an analyzer ID, or nil.
type StoreWriter ¶
type StoreWriter interface {
WriteToStore(ctx context.Context, ticks []TICK, w ReportWriter) error
}
StoreWriter is optionally implemented by HistoryAnalyzers that can write chunked records directly to a ReportWriter, bypassing monolithic Report maps. Analyzers that implement this interface stream records one-at-a-time to the store during FinalizeToStore, keeping memory bounded.
type StreamingSink ¶
type StreamingSink struct {
// contains filtered or unexported fields
}
StreamingSink writes one NDJSON line per TC to an io.Writer. Thread-safe: concurrent WriteTC calls are serialized via a mutex.
func NewStreamingSink ¶
func NewStreamingSink(w io.Writer) *StreamingSink
NewStreamingSink creates a StreamingSink that writes to the given writer.
type TC ¶
type TC struct {
// CommitHash identifies the analyzed commit.
CommitHash gitlib.Hash
// Tick is the time-bucket index this commit belongs to.
Tick int
// AuthorID is the numeric identity of the commit author.
AuthorID int
// Timestamp is the commit's author time.
Timestamp time.Time
// Data carries the analyzer-specific per-commit payload.
// The concrete type is defined by each analyzer.
Data any
}
TC is a per-commit result emitted by a HistoryAnalyzer. Each Consume() call produces one TC representing the analyzer's output for that commit. Data holds an analyzer-specific payload; concrete types are documented per-analyzer.
type TCSink ¶
TCSink is a callback that receives stamped TCs during pipeline execution. Used by the NDJSON streaming output to write one JSON line per TC.
type TICK ¶
type TICK struct {
// Tick is the time-bucket index.
Tick int
// StartTime is the earliest commit timestamp in this tick.
StartTime time.Time
// EndTime is the latest commit timestamp in this tick.
EndTime time.Time
// Data carries the analyzer-specific aggregated payload.
// The concrete type is defined by each aggregator.
Data any
}
TICK is an aggregated tick-level result produced by an Aggregator. It represents the merged output of all TCs within one time bucket. Data holds an analyzer-specific aggregated payload.
type Thresholds ¶
Thresholds represents color-coded thresholds for multiple metrics Structure: {"metric_name": {"red": value, "yellow": value, "green": value}}.
type TimeSeriesChunkFlusher ¶
type TimeSeriesChunkFlusher struct {
// contains filtered or unexported fields
}
TimeSeriesChunkFlusher drains per-commit data from aggregators after each chunk and writes NDJSON lines. This keeps memory bounded to O(chunk_size) instead of O(total_commits) for large repositories.
func NewTimeSeriesChunkFlusher ¶
func NewTimeSeriesChunkFlusher(writer io.Writer, leaves []HistoryAnalyzer) *TimeSeriesChunkFlusher
NewTimeSeriesChunkFlusher creates a flusher that writes NDJSON lines to writer. leaves must be in the same order as the aggregators returned by runner.LeafAggregators().
func (*TimeSeriesChunkFlusher) Flush ¶
func (f *TimeSeriesChunkFlusher) Flush( aggregators []Aggregator, commitMeta map[string]CommitMeta, ) (int, error)
Flush drains per-commit data from all aggregators, merges with commit metadata, and writes NDJSON lines. Returns the number of commits flushed. aggregators must be in the same order as the leaves passed to the constructor.
type TypedCollection ¶
type TypedCollection struct {
Items any // concrete typed slice (e.g., []FunctionMetrics).
SourceFile string // stamped by StampSourceFile.
ToMaps ItemConverter // deferred converter.
}
TypedCollection wraps a typed struct slice for deferred map conversion. Per-file analyzers place a TypedCollection in the report instead of []map[string]any. Conversion to maps is deferred to the serialization boundary (e.g., AddToResult).
func (TypedCollection) MapSlice ¶
func (tc TypedCollection) MapSlice() []map[string]any
MapSlice converts the typed items to []map[string]any using the stored converter.
type UnifiedModel ¶
type UnifiedModel struct {
Version string `json:"version" yaml:"version"`
Analyzers []AnalyzerResult `json:"analyzers" yaml:"analyzers"`
}
UnifiedModel is the canonical intermediate model for run output conversion.
func DecodeBinaryInputModel ¶
func DecodeBinaryInputModel(input []byte) (UnifiedModel, error)
DecodeBinaryInputModel decodes a single binary envelope containing canonical unified JSON.
func DecodeCombinedBinaryReports ¶
func DecodeCombinedBinaryReports(input []byte, ids []string, modes []AnalyzerMode) (UnifiedModel, error)
DecodeCombinedBinaryReports decodes multiple binary envelopes, each containing a raw Report JSON, and pairs them positionally with the provided analyzer IDs and modes to build a UnifiedModel. This is used by the combined static+history rendering path where each phase serializes its Reports as separate envelopes.
func DecodeInputModel ¶
func DecodeInputModel( input []byte, inputFormat string, ) (UnifiedModel, error)
DecodeInputModel dispatches input decoding based on the inputFormat.
func ParseUnifiedModelJSON ¶
func ParseUnifiedModelJSON(data []byte) (UnifiedModel, error)
ParseUnifiedModelJSON parses canonical JSON into UnifiedModel.
func (UnifiedModel) Validate ¶
func (m UnifiedModel) Validate() error
Validate ensures canonical model constraints are satisfied.
type VisitorProvider ¶
type VisitorProvider interface {
CreateVisitor() AnalysisVisitor
}
VisitorProvider enables single-pass traversal optimization.
Source Files
¶
- aggregation_mode.go
- aggregator.go
- analyzer.go
- base_history.go
- commits_by_tick.go
- conversion.go
- descriptor.go
- formats.go
- generic_aggregator.go
- history.go
- merge_tracker.go
- metrics_safe.go
- multi_traverser.go
- output.go
- prepared.go
- record_reader.go
- record_writer.go
- registry.go
- report_section.go
- report_store.go
- report_store_file.go
- static.go
- streaming_sink.go
- tc.go
- timeseries.go
- timeseries_sink.go
- typed_collection.go
- uast_spill.go
- visitor.go