detection

package

v1.2.1 Latest Latest Go to latest Published: Jul 1, 2025 License: MIT Imports: 17 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/MacAttak/pi-scanner

Links

Open Source Insights

Documentation ¶

Overview ¶

Package detection implements the core PI detection engine for identifying personally identifiable information in source code. It supports multiple detection strategies including pattern matching, validation algorithms, and integration with external tools like Gitleaks.

Index ¶

func CreateAustralianPIRules() []config.Rule
func ExtractContext(content string, finding Finding, linesBefore, linesAfter int) string
func GetEmbeddedConfig() string
type ASTContext
type Config
- func DefaultConfig() *Config
type ConfigLoader
- func NewConfigLoader() *ConfigLoader
- func (cl *ConfigLoader) LoadGitleaksConfig(customPath string) (string, error)
type Detector
- func NewDetector() Detector
- func NewDetectorWithConfig(config *Config) Detector
- func NewGitleaksDetector(configPath string) (Detector, error)
- func NewGitleaksDetectorAuto() (Detector, error)
- func NewGitleaksDetectorWithDefaults() (Detector, error)
type Finding
type LLMEnhancedConfig
type LLMEnhancedDetector
- func NewLLMEnhancedDetector(baseDetector Detector, validator LLMValidator, config *LLMEnhancedConfig) *LLMEnhancedDetector
- func (d *LLMEnhancedDetector) Detect(ctx context.Context, content []byte, filename string) ([]Finding, error)
- func (d *LLMEnhancedDetector) Name() string
- func (d *LLMEnhancedDetector) SetProgressCallback(callback func(processed, total int, rate float64))
type LLMValidationRequest
type LLMValidationResult
type LLMValidator
type PIType
type PatternMatch
type PatternMatcher
type RiskLevel
- func (r RiskLevel) Compare(other RiskLevel) int
- func (r RiskLevel) ToInt() int
type ScanError
type ScanResult
type ScanSummary
type ValidationProgress
type Validator

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func CreateAustralianPIRules ¶

func CreateAustralianPIRules() []config.Rule

CreateAustralianPIRules creates Gitleaks rules for Australian PI

func ExtractContext ¶

func ExtractContext(content string, finding Finding, linesBefore, linesAfter int) string

ExtractContext extracts lines of context around a finding

func GetEmbeddedConfig ¶

func GetEmbeddedConfig() string

GetEmbeddedConfig returns the embedded gitleaks configuration

Types ¶

type ASTContext ¶

type ASTContext struct {
	// File-level information
	Language     string `json:"language"`
	FileType     string `json:"file_type"`  // e.g., "test", "config", "model", "controller"
	RiskZone     string `json:"risk_zone"`  // e.g., "customer_data", "payment_processing"
	RiskLevel    string `json:"risk_level"` // Critical, High, Medium, Low
	IsTestFile   bool   `json:"is_test_file"`
	IsConfigFile bool   `json:"is_config_file"`

	// Code structure
	Classes      []string `json:"classes,omitempty"`      // Class/type names defined in file
	Methods      []string `json:"methods,omitempty"`      // Method/function names
	Imports      []string `json:"imports,omitempty"`      // Import statements
	Dependencies []string `json:"dependencies,omitempty"` // External dependencies

	// Banking domain context
	BankingDomainIndicators []string `json:"banking_indicators,omitempty"` // e.g., "handles_customer_data", "processes_payments"
	SecurityPatterns        []string `json:"security_patterns,omitempty"`  // e.g., "uses_encryption", "has_authentication"

	// Surrounding code context
	EnclosingClass  string `json:"enclosing_class,omitempty"`  // Class containing the finding
	EnclosingMethod string `json:"enclosing_method,omitempty"` // Method containing the finding
	NearbyComments  string `json:"nearby_comments,omitempty"`  // Relevant comments near the finding
}

ASTContext contains structural information from AST analysis

type Config ¶

type Config struct {
	// Pattern matching
	EnableRegex    bool     `yaml:"enable_regex"`
	EnableGitleaks bool     `yaml:"enable_gitleaks"`
	CustomPatterns []string `yaml:"custom_patterns"`

	// Validation
	EnableValidation        bool `yaml:"enable_validation"`
	ValidateChecksums       bool `yaml:"validate_checksums"`
	EnableContextValidation bool `yaml:"enable_context_validation"`

	// Context analysis
	TestPathPatterns []string `yaml:"test_path_patterns"`
	MockPathPatterns []string `yaml:"mock_path_patterns"`
	ExcludePaths     []string `yaml:"exclude_paths"`

	// Confidence thresholds
	MinConfidenceThreshold float32 `yaml:"min_confidence_threshold"`
	ContextConfidenceBoost float32 `yaml:"context_confidence_boost"`

	// Risk scoring
	RiskWeights     map[PIType]int `yaml:"risk_weights"`
	ProximityWindow int            `yaml:"proximity_window"`

	// Performance
	MaxFileSize   int64 `yaml:"max_file_size"`
	MaxWorkers    int   `yaml:"max_workers"`
	EnableCaching bool  `yaml:"enable_cache"`

	// LLM Validation
	EnableLLMValidation bool        `yaml:"enable_llm_validation"`
	LLMProvider         string      `yaml:"llm_provider"`
	LLMEndpoint         string      `yaml:"llm_endpoint"`
	LLMModel            string      `yaml:"llm_model"`
	LLMAPIKey           string      `yaml:"llm_api_key"`
	LLMMaxTokens        int         `yaml:"llm_max_tokens"`
	LLMTemperature      float32     `yaml:"llm_temperature"`
	LLMValidateRisks    []RiskLevel `yaml:"llm_validate_risks"`
}

Config holds detection configuration

func DefaultConfig ¶

func DefaultConfig() *Config

DefaultConfig returns the default detection configuration

type ConfigLoader ¶

type ConfigLoader struct {
	// contains filtered or unexported fields
}

ConfigLoader handles loading configuration from various sources

func NewConfigLoader ¶

func NewConfigLoader() *ConfigLoader

NewConfigLoader creates a new config loader with default search paths

func (*ConfigLoader) LoadGitleaksConfig ¶

func (cl *ConfigLoader) LoadGitleaksConfig(customPath string) (string, error)

LoadGitleaksConfig attempts to load gitleaks configuration from various sources

type Detector ¶

type Detector interface {
	// Detect analyzes content and returns findings
	Detect(ctx context.Context, content []byte, filename string) ([]Finding, error)

	// Name returns the detector name
	Name() string
}

Detector is the interface for PI detection engines

func NewDetector ¶

func NewDetector() Detector

NewDetector creates a new detector with default configuration

func NewDetectorWithConfig ¶

func NewDetectorWithConfig(config *Config) Detector

NewDetectorWithConfig creates a new detector with custom configuration

func NewGitleaksDetector ¶

func NewGitleaksDetector(configPath string) (Detector, error)

NewGitleaksDetector creates a new Gitleaks-based detector

func NewGitleaksDetectorAuto ¶

func NewGitleaksDetectorAuto() (Detector, error)

NewGitleaksDetectorAuto creates a detector with automatic config resolution

func NewGitleaksDetectorWithDefaults ¶

func NewGitleaksDetectorWithDefaults() (Detector, error)

NewGitleaksDetectorWithDefaults creates a detector with default config + Australian rules

type Finding ¶

type Finding struct {
	// Core fields
	Type   PIType `json:"type"`
	Match  string `json:"match"`
	File   string `json:"file"`
	Line   int    `json:"line"`
	Column int    `json:"column"`

	// Context
	Context       string `json:"context"`
	ContextBefore string `json:"context_before"`
	ContextAfter  string `json:"context_after"`

	// Risk assessment
	RiskLevel       RiskLevel `json:"risk_level"`
	Confidence      float32   `json:"confidence"`
	ContextModifier float32   `json:"context_modifier"`

	// Validation
	Validated       bool   `json:"validated"`
	ValidationError string `json:"validation_error,omitempty"`

	// LLM Validation
	LLMValidated   bool      `json:"llm_validated,omitempty"`
	LLMRisk        RiskLevel `json:"llm_risk,omitempty"`
	LLMExplanation string    `json:"llm_explanation,omitempty"`
	LLMConfidence  float64   `json:"llm_confidence,omitempty"`

	// Metadata
	DetectedAt   time.Time `json:"detected_at"`
	DetectorName string    `json:"detector_name"`

	// AST Context
	ASTContext *ASTContext `json:"ast_context,omitempty"`
}

Finding represents a detected PI instance

type LLMEnhancedConfig ¶

type LLMEnhancedConfig struct {
	Enabled            bool        `yaml:"enabled"`
	ValidateRiskLevels []RiskLevel `yaml:"validate_risk_levels"`
	MaxConcurrency     int         `yaml:"max_concurrency"`
	SkipTestFiles      bool        `yaml:"skip_test_files"`
	ContextLinesBefore int         `yaml:"context_lines_before"`
	ContextLinesAfter  int         `yaml:"context_lines_after"`
}

LLMEnhancedConfig holds configuration for LLM-enhanced detection

type LLMEnhancedDetector ¶

type LLMEnhancedDetector struct {
	// contains filtered or unexported fields
}

LLMEnhancedDetector wraps a regular detector with LLM validation

func NewLLMEnhancedDetector ¶

func NewLLMEnhancedDetector(baseDetector Detector, validator LLMValidator, config *LLMEnhancedConfig) *LLMEnhancedDetector

NewLLMEnhancedDetector creates a new LLM-enhanced detector

func (*LLMEnhancedDetector) Detect ¶

func (d *LLMEnhancedDetector) Detect(ctx context.Context, content []byte, filename string) ([]Finding, error)

Detect runs the base detector and enhances findings with LLM validation

func (*LLMEnhancedDetector) Name ¶

func (d *LLMEnhancedDetector) Name() string

Name returns the detector name

func (*LLMEnhancedDetector) SetProgressCallback ¶

func (d *LLMEnhancedDetector) SetProgressCallback(callback func(processed, total int, rate float64))

SetProgressCallback sets the progress callback function

type LLMValidationRequest ¶

type LLMValidationRequest struct {
	Finding       Finding     `json:"finding"`
	Context       string      `json:"context"`
	FilePath      string      `json:"file_path"`
	FileType      string      `json:"file_type"`
	IsTestFile    bool        `json:"is_test_file"`
	SurroundingPI []Finding   `json:"surrounding_pi,omitempty"`
	ASTContext    *ASTContext `json:"ast_context,omitempty"`
}

LLMValidationRequest contains all information needed for LLM validation

type LLMValidationResult ¶

type LLMValidationResult struct {
	Risk        RiskLevel `json:"risk"`
	Explanation string    `json:"explanation"`
	Confidence  float64   `json:"confidence"`
	Timestamp   time.Time `json:"timestamp"`
}

LLMValidationResult contains the LLM's assessment of a finding

type LLMValidator ¶

type LLMValidator interface {
	ValidateFinding(ctx context.Context, req LLMValidationRequest) (*LLMValidationResult, error)
	HealthCheck(ctx context.Context) error
}

LLMValidator provides context-aware validation of findings

type PIType ¶

type PIType string

PIType represents the type of personally identifiable information

const (
	PITypeTFN           PIType = "TFN"
	PITypeMedicare      PIType = "MEDICARE"
	PITypeABN           PIType = "ABN"
	PITypeACN           PIType = "ACN"
	PITypeARBN          PIType = "ARBN"
	PITypeBSB           PIType = "BSB"
	PITypeBankAccount   PIType = "BANK_ACCOUNT"
	PITypeEmail         PIType = "EMAIL"
	PITypePhone         PIType = "PHONE"
	PITypeName          PIType = "NAME"
	PITypeAddress       PIType = "ADDRESS"
	PITypeCreditCard    PIType = "CREDIT_CARD"
	PITypeDriverLicense PIType = "DRIVER_LICENSE"
	PITypePassport      PIType = "PASSPORT"
	PITypeAccount       PIType = "ACCOUNT"
	PITypeIP            PIType = "IP_ADDRESS"
	PITypeSWIFT         PIType = "SWIFT"
)

type PatternMatch ¶

type PatternMatch struct {
	Value            string
	StartIndex       int
	EndIndex         int
	Groups           map[string]string
	ValidationPassed bool // Whether pattern-specific validation passed
}

PatternMatch represents a regex pattern match

type PatternMatcher ¶

type PatternMatcher interface {
	// Match finds all pattern matches in content
	Match(content []byte) []PatternMatch

	// Type returns the PI type this matcher detects
	Type() PIType
}

PatternMatcher defines the interface for pattern-based detection

type RiskLevel ¶

type RiskLevel string

RiskLevel represents the severity of a finding

const (
	RiskLevelCritical RiskLevel = "CRITICAL"
	RiskLevelHigh     RiskLevel = "HIGH"
	RiskLevelMedium   RiskLevel = "MEDIUM"
	RiskLevelLow      RiskLevel = "LOW"
)

func (RiskLevel) Compare ¶

func (r RiskLevel) Compare(other RiskLevel) int

Compare returns -1 if r < other, 0 if r == other, 1 if r > other

func (RiskLevel) ToInt ¶

func (r RiskLevel) ToInt() int

ToInt converts RiskLevel to integer for comparison

type ScanError ¶

type ScanError struct {
	File  string    `json:"file"`
	Error string    `json:"error"`
	Time  time.Time `json:"time"`
}

ScanError represents an error during scanning

type ScanResult ¶

type ScanResult struct {
	Repository string      `json:"repository"`
	StartTime  time.Time   `json:"start_time"`
	EndTime    time.Time   `json:"end_time"`
	Findings   []Finding   `json:"findings"`
	Summary    ScanSummary `json:"summary"`
	Errors     []ScanError `json:"errors,omitempty"`
}

ScanResult represents the complete results of a scan

type ScanSummary ¶

type ScanSummary struct {
	TotalFiles    int               `json:"total_files"`
	ScannedFiles  int               `json:"scanned_files"`
	SkippedFiles  int               `json:"skipped_files"`
	TotalFindings int               `json:"total_findings"`
	ByRiskLevel   map[RiskLevel]int `json:"by_risk_level"`
	ByType        map[PIType]int    `json:"by_type"`
	Duration      time.Duration     `json:"duration"`
}

ScanSummary provides aggregate statistics

type ValidationProgress ¶

type ValidationProgress struct {
	// contains filtered or unexported fields
}

ValidationProgress tracks LLM validation progress

type Validator ¶

type Validator interface {
	// Validate checks if the value is valid for this PI type
	Validate(value string) (bool, error)

	// Type returns the PI type this validator handles
	Type() PIType

	// Normalize returns a normalized version of the value
	Normalize(value string) string
}

Validator validates specific PI types

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
proximity Package proximity implements proximity-based PI detection enhancement.	Package proximity implements proximity-based PI detection enhancement.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL