detection

package
v1.2.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 1, 2025 License: MIT Imports: 17 Imported by: 0

Documentation

Overview

Package detection implements the core PI detection engine for identifying personally identifiable information in source code. It supports multiple detection strategies including pattern matching, validation algorithms, and integration with external tools like Gitleaks.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CreateAustralianPIRules

func CreateAustralianPIRules() []config.Rule

CreateAustralianPIRules creates Gitleaks rules for Australian PI

func ExtractContext

func ExtractContext(content string, finding Finding, linesBefore, linesAfter int) string

ExtractContext extracts lines of context around a finding

func GetEmbeddedConfig

func GetEmbeddedConfig() string

GetEmbeddedConfig returns the embedded gitleaks configuration

Types

type ASTContext

type ASTContext struct {
	// File-level information
	Language     string `json:"language"`
	FileType     string `json:"file_type"`  // e.g., "test", "config", "model", "controller"
	RiskZone     string `json:"risk_zone"`  // e.g., "customer_data", "payment_processing"
	RiskLevel    string `json:"risk_level"` // Critical, High, Medium, Low
	IsTestFile   bool   `json:"is_test_file"`
	IsConfigFile bool   `json:"is_config_file"`

	// Code structure
	Classes      []string `json:"classes,omitempty"`      // Class/type names defined in file
	Methods      []string `json:"methods,omitempty"`      // Method/function names
	Imports      []string `json:"imports,omitempty"`      // Import statements
	Dependencies []string `json:"dependencies,omitempty"` // External dependencies

	// Banking domain context
	BankingDomainIndicators []string `json:"banking_indicators,omitempty"` // e.g., "handles_customer_data", "processes_payments"
	SecurityPatterns        []string `json:"security_patterns,omitempty"`  // e.g., "uses_encryption", "has_authentication"

	// Surrounding code context
	EnclosingClass  string `json:"enclosing_class,omitempty"`  // Class containing the finding
	EnclosingMethod string `json:"enclosing_method,omitempty"` // Method containing the finding
	NearbyComments  string `json:"nearby_comments,omitempty"`  // Relevant comments near the finding
}

ASTContext contains structural information from AST analysis

type Config

type Config struct {
	// Pattern matching
	EnableRegex    bool     `yaml:"enable_regex"`
	EnableGitleaks bool     `yaml:"enable_gitleaks"`
	CustomPatterns []string `yaml:"custom_patterns"`

	// Validation
	EnableValidation        bool `yaml:"enable_validation"`
	ValidateChecksums       bool `yaml:"validate_checksums"`
	EnableContextValidation bool `yaml:"enable_context_validation"`

	// Context analysis
	TestPathPatterns []string `yaml:"test_path_patterns"`
	MockPathPatterns []string `yaml:"mock_path_patterns"`
	ExcludePaths     []string `yaml:"exclude_paths"`

	// Confidence thresholds
	MinConfidenceThreshold float32 `yaml:"min_confidence_threshold"`
	ContextConfidenceBoost float32 `yaml:"context_confidence_boost"`

	// Risk scoring
	RiskWeights     map[PIType]int `yaml:"risk_weights"`
	ProximityWindow int            `yaml:"proximity_window"`

	// Performance
	MaxFileSize   int64 `yaml:"max_file_size"`
	MaxWorkers    int   `yaml:"max_workers"`
	EnableCaching bool  `yaml:"enable_cache"`

	// LLM Validation
	EnableLLMValidation bool        `yaml:"enable_llm_validation"`
	LLMProvider         string      `yaml:"llm_provider"`
	LLMEndpoint         string      `yaml:"llm_endpoint"`
	LLMModel            string      `yaml:"llm_model"`
	LLMAPIKey           string      `yaml:"llm_api_key"`
	LLMMaxTokens        int         `yaml:"llm_max_tokens"`
	LLMTemperature      float32     `yaml:"llm_temperature"`
	LLMValidateRisks    []RiskLevel `yaml:"llm_validate_risks"`
}

Config holds detection configuration

func DefaultConfig

func DefaultConfig() *Config

DefaultConfig returns the default detection configuration

type ConfigLoader

type ConfigLoader struct {
	// contains filtered or unexported fields
}

ConfigLoader handles loading configuration from various sources

func NewConfigLoader

func NewConfigLoader() *ConfigLoader

NewConfigLoader creates a new config loader with default search paths

func (*ConfigLoader) LoadGitleaksConfig

func (cl *ConfigLoader) LoadGitleaksConfig(customPath string) (string, error)

LoadGitleaksConfig attempts to load gitleaks configuration from various sources

type Detector

type Detector interface {
	// Detect analyzes content and returns findings
	Detect(ctx context.Context, content []byte, filename string) ([]Finding, error)

	// Name returns the detector name
	Name() string
}

Detector is the interface for PI detection engines

func NewDetector

func NewDetector() Detector

NewDetector creates a new detector with default configuration

func NewDetectorWithConfig

func NewDetectorWithConfig(config *Config) Detector

NewDetectorWithConfig creates a new detector with custom configuration

func NewGitleaksDetector

func NewGitleaksDetector(configPath string) (Detector, error)

NewGitleaksDetector creates a new Gitleaks-based detector

func NewGitleaksDetectorAuto

func NewGitleaksDetectorAuto() (Detector, error)

NewGitleaksDetectorAuto creates a detector with automatic config resolution

func NewGitleaksDetectorWithDefaults

func NewGitleaksDetectorWithDefaults() (Detector, error)

NewGitleaksDetectorWithDefaults creates a detector with default config + Australian rules

type Finding

type Finding struct {
	// Core fields
	Type   PIType `json:"type"`
	Match  string `json:"match"`
	File   string `json:"file"`
	Line   int    `json:"line"`
	Column int    `json:"column"`

	// Context
	Context       string `json:"context"`
	ContextBefore string `json:"context_before"`
	ContextAfter  string `json:"context_after"`

	// Risk assessment
	RiskLevel       RiskLevel `json:"risk_level"`
	Confidence      float32   `json:"confidence"`
	ContextModifier float32   `json:"context_modifier"`

	// Validation
	Validated       bool   `json:"validated"`
	ValidationError string `json:"validation_error,omitempty"`

	// LLM Validation
	LLMValidated   bool      `json:"llm_validated,omitempty"`
	LLMRisk        RiskLevel `json:"llm_risk,omitempty"`
	LLMExplanation string    `json:"llm_explanation,omitempty"`
	LLMConfidence  float64   `json:"llm_confidence,omitempty"`

	// Metadata
	DetectedAt   time.Time `json:"detected_at"`
	DetectorName string    `json:"detector_name"`

	// AST Context
	ASTContext *ASTContext `json:"ast_context,omitempty"`
}

Finding represents a detected PI instance

type LLMEnhancedConfig

type LLMEnhancedConfig struct {
	Enabled            bool        `yaml:"enabled"`
	ValidateRiskLevels []RiskLevel `yaml:"validate_risk_levels"`
	MaxConcurrency     int         `yaml:"max_concurrency"`
	SkipTestFiles      bool        `yaml:"skip_test_files"`
	ContextLinesBefore int         `yaml:"context_lines_before"`
	ContextLinesAfter  int         `yaml:"context_lines_after"`
}

LLMEnhancedConfig holds configuration for LLM-enhanced detection

type LLMEnhancedDetector

type LLMEnhancedDetector struct {
	// contains filtered or unexported fields
}

LLMEnhancedDetector wraps a regular detector with LLM validation

func NewLLMEnhancedDetector

func NewLLMEnhancedDetector(baseDetector Detector, validator LLMValidator, config *LLMEnhancedConfig) *LLMEnhancedDetector

NewLLMEnhancedDetector creates a new LLM-enhanced detector

func (*LLMEnhancedDetector) Detect

func (d *LLMEnhancedDetector) Detect(ctx context.Context, content []byte, filename string) ([]Finding, error)

Detect runs the base detector and enhances findings with LLM validation

func (*LLMEnhancedDetector) Name

func (d *LLMEnhancedDetector) Name() string

Name returns the detector name

func (*LLMEnhancedDetector) SetProgressCallback

func (d *LLMEnhancedDetector) SetProgressCallback(callback func(processed, total int, rate float64))

SetProgressCallback sets the progress callback function

type LLMValidationRequest

type LLMValidationRequest struct {
	Finding       Finding     `json:"finding"`
	Context       string      `json:"context"`
	FilePath      string      `json:"file_path"`
	FileType      string      `json:"file_type"`
	IsTestFile    bool        `json:"is_test_file"`
	SurroundingPI []Finding   `json:"surrounding_pi,omitempty"`
	ASTContext    *ASTContext `json:"ast_context,omitempty"`
}

LLMValidationRequest contains all information needed for LLM validation

type LLMValidationResult

type LLMValidationResult struct {
	Risk        RiskLevel `json:"risk"`
	Explanation string    `json:"explanation"`
	Confidence  float64   `json:"confidence"`
	Timestamp   time.Time `json:"timestamp"`
}

LLMValidationResult contains the LLM's assessment of a finding

type LLMValidator

type LLMValidator interface {
	ValidateFinding(ctx context.Context, req LLMValidationRequest) (*LLMValidationResult, error)
	HealthCheck(ctx context.Context) error
}

LLMValidator provides context-aware validation of findings

type PIType

type PIType string

PIType represents the type of personally identifiable information

const (
	PITypeTFN           PIType = "TFN"
	PITypeMedicare      PIType = "MEDICARE"
	PITypeABN           PIType = "ABN"
	PITypeACN           PIType = "ACN"
	PITypeARBN          PIType = "ARBN"
	PITypeBSB           PIType = "BSB"
	PITypeBankAccount   PIType = "BANK_ACCOUNT"
	PITypeEmail         PIType = "EMAIL"
	PITypePhone         PIType = "PHONE"
	PITypeName          PIType = "NAME"
	PITypeAddress       PIType = "ADDRESS"
	PITypeCreditCard    PIType = "CREDIT_CARD"
	PITypeDriverLicense PIType = "DRIVER_LICENSE"
	PITypePassport      PIType = "PASSPORT"
	PITypeAccount       PIType = "ACCOUNT"
	PITypeIP            PIType = "IP_ADDRESS"
	PITypeSWIFT         PIType = "SWIFT"
)

type PatternMatch

type PatternMatch struct {
	Value            string
	StartIndex       int
	EndIndex         int
	Groups           map[string]string
	ValidationPassed bool // Whether pattern-specific validation passed
}

PatternMatch represents a regex pattern match

type PatternMatcher

type PatternMatcher interface {
	// Match finds all pattern matches in content
	Match(content []byte) []PatternMatch

	// Type returns the PI type this matcher detects
	Type() PIType
}

PatternMatcher defines the interface for pattern-based detection

type RiskLevel

type RiskLevel string

RiskLevel represents the severity of a finding

const (
	RiskLevelCritical RiskLevel = "CRITICAL"
	RiskLevelHigh     RiskLevel = "HIGH"
	RiskLevelMedium   RiskLevel = "MEDIUM"
	RiskLevelLow      RiskLevel = "LOW"
)

func (RiskLevel) Compare

func (r RiskLevel) Compare(other RiskLevel) int

Compare returns -1 if r < other, 0 if r == other, 1 if r > other

func (RiskLevel) ToInt

func (r RiskLevel) ToInt() int

ToInt converts RiskLevel to integer for comparison

type ScanError

type ScanError struct {
	File  string    `json:"file"`
	Error string    `json:"error"`
	Time  time.Time `json:"time"`
}

ScanError represents an error during scanning

type ScanResult

type ScanResult struct {
	Repository string      `json:"repository"`
	StartTime  time.Time   `json:"start_time"`
	EndTime    time.Time   `json:"end_time"`
	Findings   []Finding   `json:"findings"`
	Summary    ScanSummary `json:"summary"`
	Errors     []ScanError `json:"errors,omitempty"`
}

ScanResult represents the complete results of a scan

type ScanSummary

type ScanSummary struct {
	TotalFiles    int               `json:"total_files"`
	ScannedFiles  int               `json:"scanned_files"`
	SkippedFiles  int               `json:"skipped_files"`
	TotalFindings int               `json:"total_findings"`
	ByRiskLevel   map[RiskLevel]int `json:"by_risk_level"`
	ByType        map[PIType]int    `json:"by_type"`
	Duration      time.Duration     `json:"duration"`
}

ScanSummary provides aggregate statistics

type ValidationProgress

type ValidationProgress struct {
	// contains filtered or unexported fields
}

ValidationProgress tracks LLM validation progress

type Validator

type Validator interface {
	// Validate checks if the value is valid for this PI type
	Validate(value string) (bool, error)

	// Type returns the PI type this validator handles
	Type() PIType

	// Normalize returns a normalized version of the value
	Normalize(value string) string
}

Validator validates specific PI types

Directories

Path Synopsis
Package proximity implements proximity-based PI detection enhancement.
Package proximity implements proximity-based PI detection enhancement.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL