analysis

package
v0.0.17 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 10, 2026 License: Apache-2.0 Imports: 33 Imported by: 0

Documentation

Overview

Package analysis provides file discovery and metadata generation for the SAIST engine. The FileDiscoverer handles scanning directories, filtering files, computing hashes, and detecting programming languages for security analysis.

Package analysis provides rule processing and LLM-based security analysis for the SAIST engine. The RuleProcessor handles rule matching, AI detection calls, and result processing for individual files and security rules.

Index

Constants

View Source
const (
	ProtoBufHeader    = "Generated by the protocol buffer compiler.  DO NOT EDIT!"
	ThriftHeader      = "Autogenerated by Thrift Compiler"
	GeneratedByMarker = "Code generated by"

	// Max number of characters we use at the file header to detect if this is a generated file.
	MaxHeaderSize = 400
)
View Source
const BatchConcurrency = 4

BatchConcurrency is the number of parallel file processing batches

View Source
const BatchSize = 50

BatchSize is the number of files to process in a single batch

View Source
const PromptDebugFileCreationMode = 0600

Variables

View Source
var DefaultIgnoredGlobs = []string{
	"**/node_modules/**/*",
	"**/jspm_packages/**/*",
	"**/.next/**/*",
	"**/.vuepress/**/*",
	"**/venv/**/*",
	"**/__pycache__/**/*",
	"**/_vendor/bundle/ruby/**/*",
	"**/.vendor/bundle/ruby/**/*",
	"**/.bundle/**/*",
	"**/.gradle/**/*",
	"**/TemporaryGeneratedFile_.*.cs",
	"**/*.designer.cs",
	"**/*.generated.cs",
	"**/*.g.cs",
	"**/*.g.i.cs",
	"**/*.min.js",
	"**/dist/**/*.js",
	"**/build/**/*.js",
	"**/*.d.ts",
	"**/dist/**/*.ts",
	"**/build/**/*.ts",
}

Functions

func CalculateFileHashFromBytes

func CalculateFileHashFromBytes(content []byte) string

CalculateFileHashFromBytes calculates the SHA256 hash from already-loaded content

func ContextWithShimmedLogger

func ContextWithShimmedLogger(ctx context.Context, l log.DDSourceLogger) context.Context

ContextWithShimmedLogger returns a context using the provided logger.

func IsGeneratedFile

func IsGeneratedFile(path string, language model.Language) (bool, error)

IsGeneratedFile returns true if a file is likely generated based on simple header heuristics. We only look at the first MaxHeaderSize bytes of the file.

func IsGeneratedFileByContent

func IsGeneratedFileByContent(content []byte, path string, language model.Language) bool

IsGeneratedFileByContent checks if a file is generated based on content headers. Use this after file content is loaded. nolint: gocyclo

func IsGeneratedFileByPath

func IsGeneratedFileByPath(path string) bool

IsGeneratedFileByPath checks if a file is generated based only on path/suffix (no content needed). Use this for early filtering before content is available.

func IsGeneratedFileFromContent

func IsGeneratedFileFromContent(fullContent []byte, path string, language model.Language) bool

IsGeneratedFileFromContent checks if content is generated without reading from disk. Use this when file content is already loaded to avoid redundant I/O. nolint: gocyclo

func IsTestFile

func IsTestFile(path string, language model.Language) (bool, error)

IsTestFile returns true if the file is likely a test file (or test-related helper) for the given language, based on path + imports.

func IsTestFileByContent

func IsTestFileByContent(content []byte, path string, language model.Language) bool

IsTestFileByContent checks if a file is a test file based on content (imports). Use this after file content is loaded.

func IsTestFileByPath

func IsTestFileByPath(path string, language model.Language) bool

IsTestFileByPath checks if a file is a test file based only on path patterns (no content needed). Use this for early filtering before content is available.

func IsTestFileFromContent

func IsTestFileFromContent(content []byte, path string, language model.Language) bool

IsTestFileFromContent checks if content is a test file without reading from disk. Use this when file content is already loaded to avoid redundant I/O.

func ShouldIgnorePath

func ShouldIgnorePath(path string) bool

Types

type AnalysisSummary

type AnalysisSummary struct {
	FilesAnalyzed []string
	Rules         []modelApi.AiPrompt
	Violations    []Violation
}

AnalysisSummary contains the results and metadata from running analysis

func RunAnalysis

func RunAnalysis(ctx context.Context, directory string, detectionModelStr, validationModelStr, output string,
	debug bool, baseURL string, requestTimeoutSec, fileConcurrency int, writePrompts, isAIGateway,
	aiGuardEnabled bool, apiKey string, jwtToken string, orgID int64, repositoryID string,
	useLocalPrompts bool, skipIndexing bool) (AnalysisSummary, error)

RunAnalysis is the main public API function that runs analysis

type FileDiscoverer

type FileDiscoverer struct {
	// contains filtered or unexported fields
}

FileDiscoverer handles file discovery and metadata generation

func NewFileDiscoverer

func NewFileDiscoverer(directory string, debug bool) *FileDiscoverer

NewFileDiscoverer creates a new file discoverer

func (*FileDiscoverer) DiscoverFiles

func (fd *FileDiscoverer) DiscoverFiles(ctx context.Context) ([]fileMeta, error)

DiscoverFiles finds all analyzable files in the directory and returns metadata. Uses two phases: fast directory walk (no I/O), then parallel file read/hash.

type ProcessFileResult

type ProcessFileResult struct {
	// RelPath is the relative path of the file processed.
	RelPath string
	// Scans contains scans that need to be executed for this file.
	// If empty, no rules apply to this file.
	Scans []model.ScanData
	// contains filtered or unexported fields
}

type ResultAggregator

type ResultAggregator struct {
	// contains filtered or unexported fields
}

ResultAggregator handles result collection with single mutex

func (*ResultAggregator) Finalize

func (w *ResultAggregator) Finalize() error

Finalize writes the complete SARIF report

func (*ResultAggregator) GetSummary

func (w *ResultAggregator) GetSummary() (violationCount, filesAnalyzed int, inputTokens, outputTokens, llmCalls int32)

GetSummary returns current processing summary

func (*ResultAggregator) ProcessResults

func (w *ResultAggregator) ProcessResults(fileResults []model.FileResult, violations []model.Violation,
	inputTokens, outputTokens, llmCalls int32)

ProcessResults processes file results (thread-safe)

type RuleIndex

type RuleIndex struct {
	// contains filtered or unexported fields
}

RuleIndex pre-computes rule-language mappings for efficient lookup

type RuleProcessor

type RuleProcessor struct {
	// contains filtered or unexported fields
}

RuleProcessor handles per-file rule processing logic

func NewRuleProcessor

func NewRuleProcessor(agent *agents.DetectionAgent, opts *model.AnalysisOptions,
	aiContext *model.AiContextProject) (*RuleProcessor, error)

NewRuleProcessor creates a new rule processor

func (*RuleProcessor) BuildScanDataForResult

func (rp *RuleProcessor) BuildScanDataForResult(ctx context.Context, result *ProcessFileResult) error

BuildScanDataForResult reads file content and builds ScanData for rules that apply to this file. This should be called after ProcessFileRulesBatched for results that have applicableRules.

func (*RuleProcessor) GetApplicableRules

func (rp *RuleProcessor) GetApplicableRules(fm fileMeta) []*api.AiPrompt

GetApplicableRules returns rules that apply to the given file (used for fallback on errors)

func (*RuleProcessor) ProcessFileRulesBatched

func (rp *RuleProcessor) ProcessFileRulesBatched(files []fileMeta) ([]ProcessFileResult, error)

ProcessFileRulesBatched processes multiple files at once, returning all applicable rules for scanning.

func (*RuleProcessor) RunScans

func (rp *RuleProcessor) RunScans(ctx context.Context, scanDataList []model.ScanData) (RunScansResult, error)

RunScans runs and returns metrics for the provided list of ScanData. For accurate metrics, the caller should ensure that all ScanData are for the same file.

type RunScanResult

type RunScanResult struct {
	Violations       []model.Violation
	FileInputTokens  int32
	FileOutputTokens int32
}

type RunScansResult

type RunScansResult struct {
	RulesSuccess     []string
	RulesFailed      []string
	Violations       []model.Violation
	FileInputTokens  int32
	FileOutputTokens int32
	FileLLMCalls     int32
}

type Violation

type Violation = model.Violation

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL