Documentation
¶
Overview ¶
Package analysis provides file discovery and metadata generation for the SAIST engine. The FileDiscoverer handles scanning directories, filtering files, computing hashes, and detecting programming languages for security analysis.
Package analysis provides rule processing and LLM-based security analysis for the SAIST engine. The RuleProcessor handles rule matching, AI detection calls, and result processing for individual files and security rules.
Index ¶
- Constants
- Variables
- func CalculateFileHashFromBytes(content []byte) string
- func ContextWithShimmedLogger(ctx context.Context, l log.DDSourceLogger) context.Context
- func IsGeneratedFile(path string, language model.Language) (bool, error)
- func IsGeneratedFileByContent(content []byte, path string, language model.Language) bool
- func IsGeneratedFileByPath(path string) bool
- func IsGeneratedFileFromContent(fullContent []byte, path string, language model.Language) bool
- func IsTestFile(path string, language model.Language) (bool, error)
- func IsTestFileByContent(content []byte, path string, language model.Language) bool
- func IsTestFileByPath(path string, language model.Language) bool
- func IsTestFileFromContent(content []byte, path string, language model.Language) bool
- func ShouldIgnorePath(path string) bool
- type AnalysisSummary
- type FileDiscoverer
- type ProcessFileResult
- type ResultAggregator
- type RuleIndex
- type RuleProcessor
- func (rp *RuleProcessor) BuildScanDataForResult(ctx context.Context, result *ProcessFileResult) error
- func (rp *RuleProcessor) GetApplicableRules(fm fileMeta) []*api.AiPrompt
- func (rp *RuleProcessor) ProcessFileRulesBatched(files []fileMeta) ([]ProcessFileResult, error)
- func (rp *RuleProcessor) RunScans(ctx context.Context, scanDataList []model.ScanData) (RunScansResult, error)
- type RunScanResult
- type RunScansResult
- type Violation
Constants ¶
const ( ProtoBufHeader = "Generated by the protocol buffer compiler. DO NOT EDIT!" ThriftHeader = "Autogenerated by Thrift Compiler" GeneratedByMarker = "Code generated by" // Max number of characters we use at the file header to detect if this is a generated file. MaxHeaderSize = 400 )
const BatchConcurrency = 4
BatchConcurrency is the number of parallel file processing batches
const BatchSize = 50
BatchSize is the number of files to process in a single batch
const PromptDebugFileCreationMode = 0600
Variables ¶
var DefaultIgnoredGlobs = []string{
"**/node_modules/**/*",
"**/jspm_packages/**/*",
"**/.next/**/*",
"**/.vuepress/**/*",
"**/venv/**/*",
"**/__pycache__/**/*",
"**/_vendor/bundle/ruby/**/*",
"**/.vendor/bundle/ruby/**/*",
"**/.bundle/**/*",
"**/.gradle/**/*",
"**/TemporaryGeneratedFile_.*.cs",
"**/*.designer.cs",
"**/*.generated.cs",
"**/*.g.cs",
"**/*.g.i.cs",
"**/*.min.js",
"**/dist/**/*.js",
"**/build/**/*.js",
"**/*.d.ts",
"**/dist/**/*.ts",
"**/build/**/*.ts",
}
Functions ¶
func CalculateFileHashFromBytes ¶
CalculateFileHashFromBytes calculates the SHA256 hash from already-loaded content
func ContextWithShimmedLogger ¶
ContextWithShimmedLogger returns a context using the provided logger.
func IsGeneratedFile ¶
IsGeneratedFile returns true if a file is likely generated based on simple header heuristics. We only look at the first MaxHeaderSize bytes of the file.
func IsGeneratedFileByContent ¶
IsGeneratedFileByContent checks if a file is generated based on content headers. Use this after file content is loaded. nolint: gocyclo
func IsGeneratedFileByPath ¶
IsGeneratedFileByPath checks if a file is generated based only on path/suffix (no content needed). Use this for early filtering before content is available.
func IsGeneratedFileFromContent ¶
IsGeneratedFileFromContent checks if content is generated without reading from disk. Use this when file content is already loaded to avoid redundant I/O. nolint: gocyclo
func IsTestFile ¶
IsTestFile returns true if the file is likely a test file (or test-related helper) for the given language, based on path + imports.
func IsTestFileByContent ¶
IsTestFileByContent checks if a file is a test file based on content (imports). Use this after file content is loaded.
func IsTestFileByPath ¶
IsTestFileByPath checks if a file is a test file based only on path patterns (no content needed). Use this for early filtering before content is available.
func IsTestFileFromContent ¶
IsTestFileFromContent checks if content is a test file without reading from disk. Use this when file content is already loaded to avoid redundant I/O.
func ShouldIgnorePath ¶
Types ¶
type AnalysisSummary ¶
type AnalysisSummary struct {
FilesAnalyzed []string
Rules []modelApi.AiPrompt
Violations []Violation
}
AnalysisSummary contains the results and metadata from running analysis
func RunAnalysis ¶
func RunAnalysis(ctx context.Context, directory string, detectionModelStr, validationModelStr, output string, debug bool, baseURL string, requestTimeoutSec, fileConcurrency int, writePrompts, isAIGateway, aiGuardEnabled bool, apiKey string, jwtToken string, orgID int64, repositoryID string, useLocalPrompts bool, skipIndexing bool) (AnalysisSummary, error)
RunAnalysis is the main public API function that runs analysis
type FileDiscoverer ¶
type FileDiscoverer struct {
// contains filtered or unexported fields
}
FileDiscoverer handles file discovery and metadata generation
func NewFileDiscoverer ¶
func NewFileDiscoverer(directory string, debug bool) *FileDiscoverer
NewFileDiscoverer creates a new file discoverer
func (*FileDiscoverer) DiscoverFiles ¶
func (fd *FileDiscoverer) DiscoverFiles(ctx context.Context) ([]fileMeta, error)
DiscoverFiles finds all analyzable files in the directory and returns metadata. Uses two phases: fast directory walk (no I/O), then parallel file read/hash.
type ProcessFileResult ¶
type ResultAggregator ¶
type ResultAggregator struct {
// contains filtered or unexported fields
}
ResultAggregator handles result collection with single mutex
func (*ResultAggregator) Finalize ¶
func (w *ResultAggregator) Finalize() error
Finalize writes the complete SARIF report
func (*ResultAggregator) GetSummary ¶
func (w *ResultAggregator) GetSummary() (violationCount, filesAnalyzed int, inputTokens, outputTokens, llmCalls int32)
GetSummary returns current processing summary
func (*ResultAggregator) ProcessResults ¶
func (w *ResultAggregator) ProcessResults(fileResults []model.FileResult, violations []model.Violation, inputTokens, outputTokens, llmCalls int32)
ProcessResults processes file results (thread-safe)
type RuleIndex ¶
type RuleIndex struct {
// contains filtered or unexported fields
}
RuleIndex pre-computes rule-language mappings for efficient lookup
type RuleProcessor ¶
type RuleProcessor struct {
// contains filtered or unexported fields
}
RuleProcessor handles per-file rule processing logic
func NewRuleProcessor ¶
func NewRuleProcessor(agent *agents.DetectionAgent, opts *model.AnalysisOptions, aiContext *model.AiContextProject) (*RuleProcessor, error)
NewRuleProcessor creates a new rule processor
func (*RuleProcessor) BuildScanDataForResult ¶
func (rp *RuleProcessor) BuildScanDataForResult(ctx context.Context, result *ProcessFileResult) error
BuildScanDataForResult reads file content and builds ScanData for rules that apply to this file. This should be called after ProcessFileRulesBatched for results that have applicableRules.
func (*RuleProcessor) GetApplicableRules ¶
func (rp *RuleProcessor) GetApplicableRules(fm fileMeta) []*api.AiPrompt
GetApplicableRules returns rules that apply to the given file (used for fallback on errors)
func (*RuleProcessor) ProcessFileRulesBatched ¶
func (rp *RuleProcessor) ProcessFileRulesBatched(files []fileMeta) ([]ProcessFileResult, error)
ProcessFileRulesBatched processes multiple files at once, returning all applicable rules for scanning.
func (*RuleProcessor) RunScans ¶
func (rp *RuleProcessor) RunScans(ctx context.Context, scanDataList []model.ScanData) (RunScansResult, error)
RunScans runs and returns metrics for the provided list of ScanData. For accurate metrics, the caller should ensure that all ScanData are for the same file.