analysis

package

v0.0.17 Latest Latest Go to latest Published: Jun 10, 2026 License: Apache-2.0 Imports: 33 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/DataDog/datadog-saist

Links

Open Source Insights

Documentation ¶

Overview ¶

Package analysis provides file discovery and metadata generation for the SAIST engine. The FileDiscoverer handles scanning directories, filtering files, computing hashes, and detecting programming languages for security analysis.

Package analysis provides rule processing and LLM-based security analysis for the SAIST engine. The RuleProcessor handles rule matching, AI detection calls, and result processing for individual files and security rules.

Index ¶

Constants
Variables
func CalculateFileHashFromBytes(content []byte) string
func ContextWithShimmedLogger(ctx context.Context, l log.DDSourceLogger) context.Context
func IsGeneratedFile(path string, language model.Language) (bool, error)
func IsGeneratedFileByContent(content []byte, path string, language model.Language) bool
func IsGeneratedFileByPath(path string) bool
func IsGeneratedFileFromContent(fullContent []byte, path string, language model.Language) bool
func IsTestFile(path string, language model.Language) (bool, error)
func IsTestFileByContent(content []byte, path string, language model.Language) bool
func IsTestFileByPath(path string, language model.Language) bool
func IsTestFileFromContent(content []byte, path string, language model.Language) bool
func ShouldIgnorePath(path string) bool
type AnalysisSummary
- func RunAnalysis(ctx context.Context, directory string, ...) (AnalysisSummary, error)
type FileDiscoverer
- func NewFileDiscoverer(directory string, debug bool) *FileDiscoverer
- func (fd *FileDiscoverer) DiscoverFiles(ctx context.Context) ([]fileMeta, error)
type ProcessFileResult
type ResultAggregator
- func (w *ResultAggregator) Finalize() error
- func (w *ResultAggregator) GetSummary() (violationCount, filesAnalyzed int, inputTokens, outputTokens, llmCalls int32)
- func (w *ResultAggregator) ProcessResults(fileResults []model.FileResult, violations []model.Violation, ...)
type RuleIndex
type RuleProcessor
- func NewRuleProcessor(agent *agents.DetectionAgent, opts *model.AnalysisOptions, ...) (*RuleProcessor, error)
- func (rp *RuleProcessor) BuildScanDataForResult(ctx context.Context, result *ProcessFileResult) error
- func (rp *RuleProcessor) GetApplicableRules(fm fileMeta) []*api.AiPrompt
- func (rp *RuleProcessor) ProcessFileRulesBatched(files []fileMeta) ([]ProcessFileResult, error)
- func (rp *RuleProcessor) RunScans(ctx context.Context, scanDataList []model.ScanData) (RunScansResult, error)
type RunScanResult
type RunScansResult
type Violation

Constants ¶

View Source

const (
	ProtoBufHeader    = "Generated by the protocol buffer compiler.  DO NOT EDIT!"
	ThriftHeader      = "Autogenerated by Thrift Compiler"
	GeneratedByMarker = "Code generated by"

	// Max number of characters we use at the file header to detect if this is a generated file.
	MaxHeaderSize = 400
)

View Source

const BatchConcurrency = 4

BatchConcurrency is the number of parallel file processing batches

View Source

const BatchSize = 50

BatchSize is the number of files to process in a single batch

View Source

const PromptDebugFileCreationMode = 0600

Variables ¶

View Source

var DefaultIgnoredGlobs = []string{
	"**/node_modules/**/*",
	"**/jspm_packages/**/*",
	"**/.next/**/*",
	"**/.vuepress/**/*",
	"**/venv/**/*",
	"**/__pycache__/**/*",
	"**/_vendor/bundle/ruby/**/*",
	"**/.vendor/bundle/ruby/**/*",
	"**/.bundle/**/*",
	"**/.gradle/**/*",
	"**/TemporaryGeneratedFile_.*.cs",
	"**/*.designer.cs",
	"**/*.generated.cs",
	"**/*.g.cs",
	"**/*.g.i.cs",
	"**/*.min.js",
	"**/dist/**/*.js",
	"**/build/**/*.js",
	"**/*.d.ts",
	"**/dist/**/*.ts",
	"**/build/**/*.ts",
}

Functions ¶

func CalculateFileHashFromBytes ¶

func CalculateFileHashFromBytes(content []byte) string

CalculateFileHashFromBytes calculates the SHA256 hash from already-loaded content

func ContextWithShimmedLogger ¶

func ContextWithShimmedLogger(ctx context.Context, l log.DDSourceLogger) context.Context

ContextWithShimmedLogger returns a context using the provided logger.

func IsGeneratedFile ¶

func IsGeneratedFile(path string, language model.Language) (bool, error)

IsGeneratedFile returns true if a file is likely generated based on simple header heuristics. We only look at the first MaxHeaderSize bytes of the file.

func IsGeneratedFileByContent ¶

func IsGeneratedFileByContent(content []byte, path string, language model.Language) bool

IsGeneratedFileByContent checks if a file is generated based on content headers. Use this after file content is loaded. nolint: gocyclo

func IsGeneratedFileByPath ¶

func IsGeneratedFileByPath(path string) bool

IsGeneratedFileByPath checks if a file is generated based only on path/suffix (no content needed). Use this for early filtering before content is available.

func IsGeneratedFileFromContent ¶

func IsGeneratedFileFromContent(fullContent []byte, path string, language model.Language) bool

IsGeneratedFileFromContent checks if content is generated without reading from disk. Use this when file content is already loaded to avoid redundant I/O. nolint: gocyclo

func IsTestFile ¶

func IsTestFile(path string, language model.Language) (bool, error)

IsTestFile returns true if the file is likely a test file (or test-related helper) for the given language, based on path + imports.

func IsTestFileByContent ¶

func IsTestFileByContent(content []byte, path string, language model.Language) bool

IsTestFileByContent checks if a file is a test file based on content (imports). Use this after file content is loaded.

func IsTestFileByPath ¶

func IsTestFileByPath(path string, language model.Language) bool

IsTestFileByPath checks if a file is a test file based only on path patterns (no content needed). Use this for early filtering before content is available.

func IsTestFileFromContent ¶

func IsTestFileFromContent(content []byte, path string, language model.Language) bool

IsTestFileFromContent checks if content is a test file without reading from disk. Use this when file content is already loaded to avoid redundant I/O.

func ShouldIgnorePath ¶

func ShouldIgnorePath(path string) bool

Types ¶

type AnalysisSummary ¶

type AnalysisSummary struct {
	FilesAnalyzed []string
	Rules         []modelApi.AiPrompt
	Violations    []Violation
}

AnalysisSummary contains the results and metadata from running analysis

func RunAnalysis ¶

func RunAnalysis(ctx context.Context, directory string, detectionModelStr, validationModelStr, output string,
	debug bool, baseURL string, requestTimeoutSec, fileConcurrency int, writePrompts, isAIGateway,
	aiGuardEnabled bool, apiKey string, jwtToken string, orgID int64, repositoryID string,
	useLocalPrompts bool, skipIndexing bool) (AnalysisSummary, error)

RunAnalysis is the main public API function that runs analysis

type FileDiscoverer ¶

type FileDiscoverer struct {
	// contains filtered or unexported fields
}

FileDiscoverer handles file discovery and metadata generation

func NewFileDiscoverer ¶

func NewFileDiscoverer(directory string, debug bool) *FileDiscoverer

NewFileDiscoverer creates a new file discoverer

func (*FileDiscoverer) DiscoverFiles ¶

func (fd *FileDiscoverer) DiscoverFiles(ctx context.Context) ([]fileMeta, error)

DiscoverFiles finds all analyzable files in the directory and returns metadata. Uses two phases: fast directory walk (no I/O), then parallel file read/hash.

type ProcessFileResult ¶

type ProcessFileResult struct {
	// RelPath is the relative path of the file processed.
	RelPath string
	// Scans contains scans that need to be executed for this file.
	// If empty, no rules apply to this file.
	Scans []model.ScanData
	// contains filtered or unexported fields
}

type ResultAggregator ¶

type ResultAggregator struct {
	// contains filtered or unexported fields
}

ResultAggregator handles result collection with single mutex

func (*ResultAggregator) Finalize ¶

func (w *ResultAggregator) Finalize() error

Finalize writes the complete SARIF report

func (*ResultAggregator) GetSummary ¶

func (w *ResultAggregator) GetSummary() (violationCount, filesAnalyzed int, inputTokens, outputTokens, llmCalls int32)

GetSummary returns current processing summary

func (*ResultAggregator) ProcessResults ¶

func (w *ResultAggregator) ProcessResults(fileResults []model.FileResult, violations []model.Violation,
	inputTokens, outputTokens, llmCalls int32)

ProcessResults processes file results (thread-safe)

type RuleIndex ¶

type RuleIndex struct {
	// contains filtered or unexported fields
}

RuleIndex pre-computes rule-language mappings for efficient lookup

type RuleProcessor ¶

type RuleProcessor struct {
	// contains filtered or unexported fields
}

RuleProcessor handles per-file rule processing logic

func NewRuleProcessor ¶

func NewRuleProcessor(agent *agents.DetectionAgent, opts *model.AnalysisOptions,
	aiContext *model.AiContextProject) (*RuleProcessor, error)

NewRuleProcessor creates a new rule processor

func (*RuleProcessor) BuildScanDataForResult ¶

func (rp *RuleProcessor) BuildScanDataForResult(ctx context.Context, result *ProcessFileResult) error

BuildScanDataForResult reads file content and builds ScanData for rules that apply to this file. This should be called after ProcessFileRulesBatched for results that have applicableRules.

func (*RuleProcessor) GetApplicableRules ¶

func (rp *RuleProcessor) GetApplicableRules(fm fileMeta) []*api.AiPrompt

GetApplicableRules returns rules that apply to the given file (used for fallback on errors)

func (*RuleProcessor) ProcessFileRulesBatched ¶

func (rp *RuleProcessor) ProcessFileRulesBatched(files []fileMeta) ([]ProcessFileResult, error)

ProcessFileRulesBatched processes multiple files at once, returning all applicable rules for scanning.

func (*RuleProcessor) RunScans ¶

func (rp *RuleProcessor) RunScans(ctx context.Context, scanDataList []model.ScanData) (RunScansResult, error)

RunScans runs and returns metrics for the provided list of ScanData. For accurate metrics, the caller should ensure that all ScanData are for the same file.

type RunScanResult ¶

type RunScanResult struct {
	Violations       []model.Violation
	FileInputTokens  int32
	FileOutputTokens int32
}

type RunScansResult ¶

type RunScansResult struct {
	RulesSuccess     []string
	RulesFailed      []string
	Violations       []model.Violation
	FileInputTokens  int32
	FileOutputTokens int32
	FileLLMCalls     int32
}

type Violation ¶

type Violation = model.Violation

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL