diagnostic

package
v0.0.0-...-6196728 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 6, 2025 License: AGPL-3.0 Imports: 14 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BuildAnalysisPrompt

func BuildAnalysisPrompt(sourceCode string) string

BuildAnalysisPrompt constructs the prompt for LLM pattern discovery and test generation. Based on diagnostic-tech-proposal.md Section 3.3 (LLM Prompt Design).

func FlowsMatch

func FlowsMatch(f1, f2 NormalizedTaintFlow, config MatchConfig) bool

FlowsMatch checks if two normalized flows match (fuzzy matching).

func GenerateConsoleReport

func GenerateConsoleReport(metrics *OverallMetrics, outputDir string) error

GenerateConsoleReport prints human-readable report to stdout.

func GenerateJSONReport

func GenerateJSONReport(
	metrics *OverallMetrics,
	comparisons []*DualLevelComparison,
	outputPath string,
) error

GenerateJSONReport writes machine-readable JSON report.

Types

type AnalysisMetadata

type AnalysisMetadata struct {
	TotalSources    int      `json:"total_sources"`
	TotalSinks      int      `json:"total_sinks"`
	TotalSanitizers int      `json:"total_sanitizers"`
	TotalFlows      int      `json:"total_flows"`
	DangerousFlows  int      `json:"dangerous_flows"`
	SafeFlows       int      `json:"safe_flows"`
	Confidence      float64  `json:"confidence"`
	Limitations     []string `json:"limitations"`
	ProcessingTime  string   `json:"processing_time,omitempty"`
	ModelUsed       string   `json:"model_used,omitempty"`
}

AnalysisMetadata contains metadata about the LLM analysis.

type DataflowTestCase

type DataflowTestCase struct {
	// TestID for reference
	TestID int `json:"test_id"`

	// Description of what this test validates
	Description string `json:"description"`

	// Source information
	Source TestCaseSource `json:"source"`

	// Sink information
	Sink TestCaseSink `json:"sink"`

	// Flow path (sequence of variables/operations)
	FlowPath []FlowStep `json:"flow_path"`

	// Sanitizers in the path (if any)
	SanitizersInPath []string `json:"sanitizers_in_path"`

	// Expected detection result
	// true: Our tool SHOULD detect this flow
	// false: Our tool should NOT detect (e.g., sanitized)
	ExpectedDetection bool `json:"expected_detection"`

	// Vulnerability type (if ExpectedDetection == true)
	VulnerabilityType string `json:"vulnerability_type"`

	// Confidence score (0.0-1.0)
	Confidence float64 `json:"confidence"`

	// Reasoning for this test case
	Reasoning string `json:"reasoning"`

	// Failure category (if tool might miss this)
	// Categories: control_flow_branch, field_sensitivity, sanitizer_missed,
	//            container_operation, string_formatting, method_call_propagation,
	//            assignment_chain, return_flow, parameter_flow, complex_expression,
	//            context_required, none
	FailureCategory string `json:"failure_category,omitempty"`
}

DataflowTestCase is a test case generated by LLM. This is what we validate our tool against.

type DiscoveredPatterns

type DiscoveredPatterns struct {
	Sources     []PatternLocation     `json:"sources"`
	Sinks       []PatternLocation     `json:"sinks"`
	Sanitizers  []PatternLocation     `json:"sanitizers"`
	Propagators []PropagatorOperation `json:"propagators"`
}

DiscoveredPatterns contains all patterns discovered by LLM in the function.

type DualLevelComparison

type DualLevelComparison struct {
	FunctionFQN string

	// Level 1: Binary classification
	BinaryToolResult bool // Tool says: has flow
	BinaryLLMResult  bool // LLM says: has flow
	BinaryAgreement  bool // Do they agree?

	// Level 2: Detailed flow comparison (only if both found flows)
	DetailedComparison *FlowComparisonResult // nil if N/A

	// Metrics
	Precision float64
	Recall    float64
	F1Score   float64

	// Categorization (if disagreement)
	FailureCategory string // "control_flow", "sanitizer", etc.
	FailureReason   string // From LLM reasoning
}

DualLevelComparison represents comparison results at both binary and detailed levels.

func CompareFunctionResults

func CompareFunctionResults(
	fn *FunctionMetadata,
	toolResult *FunctionTaintResult,
	llmResult *LLMAnalysisResult,
) *DualLevelComparison

CompareFunctionResults performs dual-level comparison between tool and LLM results.

Performance: ~1ms per function

Example:

comparison := CompareFunctionResults(fn, toolResult, llmResult)
if comparison.BinaryAgreement {
    fmt.Println("✅ Agreement on binary level")
}
if comparison.DetailedComparison != nil {
    fmt.Printf("Flow precision: %.2f%%\n", comparison.Precision*100)
}

type FailureExample

type FailureExample struct {
	Type         string // "FALSE_POSITIVE", "FALSE_NEGATIVE"
	FunctionFQN  string
	FunctionFile string
	FunctionLine int
	Category     string               // "control_flow", "sanitizer", etc.
	Reason       string               // From LLM
	Flow         *NormalizedTaintFlow // Flow details (if applicable)
}

FailureExample represents a specific failure case.

func ExtractTopFailures

func ExtractTopFailures(
	comparisons []*DualLevelComparison,
	functionsMap map[string]*FunctionMetadata,
	maxPerType int,
) []FailureExample

ExtractTopFailures extracts the most important failure examples. Returns up to maxPerType failures of each type (FP/FN).

type FlowComparisonResult

type FlowComparisonResult struct {
	ToolFlows []NormalizedTaintFlow
	LLMFlows  []NormalizedTaintFlow

	Matches       []FlowMatch           // TP: Both found
	UnmatchedTool []NormalizedTaintFlow // FP: Tool only
	UnmatchedLLM  []NormalizedTaintFlow // FN: LLM only

	FlowPrecision float64 // Matches / ToolFlows
	FlowRecall    float64 // Matches / LLMFlows
	FlowF1Score   float64 // 2PR/(P+R)
}

FlowComparisonResult contains detailed flow-by-flow comparison.

func CompareNormalizedFlows

func CompareNormalizedFlows(
	toolFlows, llmFlows []NormalizedTaintFlow,
	config MatchConfig,
) *FlowComparisonResult

CompareNormalizedFlows performs detailed flow-by-flow comparison with fuzzy matching.

type FlowMatch

type FlowMatch struct {
	ToolFlow  NormalizedTaintFlow
	LLMFlow   NormalizedTaintFlow
	ToolIndex int
	LLMIndex  int
}

FlowMatch represents a matched flow between tool and LLM.

type FlowStep

type FlowStep struct {
	Line      int    `json:"line"`
	Variable  string `json:"variable"`
	Operation string `json:"operation"` // "source", "assignment", "call", "sanitizer", "sink"
}

FlowStep describes one step in a dataflow path.

type FunctionMetadata

type FunctionMetadata struct {
	// FilePath is the relative path to the source file
	// Example: "myapp/views.py"
	FilePath string

	// FunctionName is the simple function name
	// Example: "process_input"
	FunctionName string

	// FQN is the fully qualified name (module.Class.function)
	// Example: "myapp.views.process_input" or "myapp.models.User.save"
	FQN string

	// StartLine is the first line of the function definition (1-indexed)
	// Includes decorators if present
	StartLine int

	// EndLine is the last line of the function body (1-indexed)
	EndLine int

	// SourceCode is the complete function source code
	// Includes decorators, signature, and body
	SourceCode string

	// LOC is lines of code (EndLine - StartLine + 1)
	LOC int

	// HasDecorators indicates if function has decorators (@property, @classmethod, etc.)
	HasDecorators bool

	// ClassName is the containing class name (if method), empty if top-level function
	// Example: "User" for myapp.models.User.save
	ClassName string

	// IsMethod indicates if this is a class method (has self/cls parameter)
	IsMethod bool

	// IsAsync indicates if this is an async function
	IsAsync bool
}

FunctionMetadata contains all information about a function needed for diagnostic analysis.

func ExtractAllFunctions

func ExtractAllFunctions(projectPath string) ([]*FunctionMetadata, error)

ExtractAllFunctions walks a project directory and extracts all Python function definitions. Returns a slice of FunctionMetadata for each function found.

Performance: ~1-2 seconds for 10,000 functions

Example:

functions, err := ExtractAllFunctions("/path/to/project")
if err != nil {
    log.Fatal(err)
}
fmt.Printf("Found %d functions\n", len(functions))

type FunctionTaintResult

type FunctionTaintResult struct {
	// FunctionFQN identifies the function
	FunctionFQN string

	// HasTaintFlow indicates if ANY taint flow was detected (binary result)
	HasTaintFlow bool

	// TaintFlows contains all detected flows (detailed result)
	TaintFlows []ToolTaintFlow

	// AnalysisError indicates if analysis failed
	AnalysisError bool

	// ErrorMessage if AnalysisError == true
	ErrorMessage string
}

FunctionTaintResult represents the structured taint analysis result for a single function. This is the internal API (not user-facing) used for diagnostic comparison.

func AnalyzeSingleFunction

func AnalyzeSingleFunction(
	fn *FunctionMetadata,
	sources []string,
	sinks []string,
	sanitizers []string,
) (*FunctionTaintResult, error)

AnalyzeSingleFunction runs intra-procedural taint analysis on a single function. This wraps existing taint analysis logic but: 1. Analyzes ONLY the specified function (not whole codebase) 2. Returns structured result (not text) 3. Filters to intra-procedural flows only

Performance: ~1-5ms per function (depends on function size)

Example:

result, err := AnalyzeSingleFunction(functionMetadata, sources, sinks, sanitizers)
if err != nil {
    log.Printf("Analysis failed: %v", err)
    return nil, err
}
if result.HasTaintFlow {
    fmt.Printf("Found %d flows\n", len(result.TaintFlows))
}

type LLMAnalysisResult

type LLMAnalysisResult struct {
	// FunctionFQN identifies which function was analyzed
	FunctionFQN string `json:"function_fqn,omitempty"`

	// DiscoveredPatterns contains sources/sinks/sanitizers found by LLM
	DiscoveredPatterns DiscoveredPatterns `json:"discovered_patterns"`

	// DataflowTestCases are test cases generated by LLM
	// Each test case specifies expected dataflow behavior
	DataflowTestCases []DataflowTestCase `json:"dataflow_test_cases"`

	// VariableTracking shows how LLM traced variables through the function
	VariableTracking []VariableTrack `json:"variable_tracking"`

	// Metadata about the analysis
	AnalysisMetadata AnalysisMetadata `json:"analysis_metadata"`
}

LLMAnalysisResult contains the LLM's analysis of a function.

type LLMClient

type LLMClient struct {
	Provider    LLMProvider
	BaseURL     string
	Model       string
	Temperature float64
	MaxTokens   int
	APIKey      string // For OpenAI-compatible APIs (xAI Grok, etc.)
	HTTPClient  *http.Client
}

LLMClient handles communication with LLM providers (Ollama, OpenAI-compatible APIs).

func NewLLMClient

func NewLLMClient(baseURL, model string) *LLMClient

NewLLMClient creates a new LLM client for Ollama. Example:

client := NewLLMClient("http://localhost:11434", "qwen3-coder:32b")

func NewOpenAIClient

func NewOpenAIClient(baseURL, model, apiKey string) *LLMClient

NewOpenAIClient creates a new OpenAI-compatible client (xAI Grok, vLLM, etc.). Example:

client := NewOpenAIClient("https://api.x.ai/v1", "grok-beta", "xai-YOUR_API_KEY")

func (*LLMClient) AnalyzeBatch

func (c *LLMClient) AnalyzeBatch(functions []*FunctionMetadata, concurrency int) (map[string]*LLMAnalysisResult, map[string]error)

AnalyzeBatch analyzes multiple functions in parallel. Returns results map (FQN -> result) and errors map (FQN -> error).

Performance: 4-8 parallel workers, ~30-60 minutes for 10k functions

Example:

client := NewLLMClient("http://localhost:11434", "qwen3-coder:32b")
results, errors := client.AnalyzeBatch(functions, 4)
fmt.Printf("Analyzed %d functions, %d errors\n", len(results), len(errors))

func (*LLMClient) AnalyzeFunction

func (c *LLMClient) AnalyzeFunction(fn *FunctionMetadata) (*LLMAnalysisResult, error)

AnalyzeFunction sends a function to the LLM for pattern discovery and test generation. Returns structured analysis result or error.

Performance: ~2-5 seconds per function (depends on function size)

Example:

client := NewLLMClient("http://localhost:11434", "qwen3-coder:32b")
result, err := client.AnalyzeFunction(functionMetadata)
if err != nil {
    log.Printf("LLM analysis failed: %v", err)
    return nil, err
}
fmt.Printf("Found %d sources, %d sinks, %d test cases\n",
    len(result.DiscoveredPatterns.Sources),
    len(result.DiscoveredPatterns.Sinks),
    len(result.DataflowTestCases))

type LLMProvider

type LLMProvider string

LLMProvider represents the type of LLM provider.

const (
	ProviderOllama LLMProvider = "ollama"
	ProviderOpenAI LLMProvider = "openai" // Also compatible with xAI Grok, vLLM, etc.
)

type MatchConfig

type MatchConfig struct {
	// LineThreshold: Accept matches within ±N lines (default: 2)
	LineThreshold int

	// AllowVariableAliases: Match user_input vs user_input_1 (SSA) (default: true)
	AllowVariableAliases bool

	// SemanticVulnTypes: "SQL_INJECTION" == "sqli" (default: true)
	SemanticVulnTypes bool
}

MatchConfig specifies how lenient fuzzy matching should be.

func DefaultMatchConfig

func DefaultMatchConfig() MatchConfig

DefaultMatchConfig returns default fuzzy matching configuration.

type NormalizedTaintFlow

type NormalizedTaintFlow struct {
	SourceLine     int
	SourceVariable string
	SourceCategory string // Semantic: "user_input", "file_read", etc.

	SinkLine     int
	SinkVariable string
	SinkCategory string // Semantic: "sql_execution", "command_exec", etc.

	VulnerabilityType string // "SQL_INJECTION", "XSS", etc.
	Confidence        float64
}

NormalizedTaintFlow is the common format for comparison. Both tool and LLM results are converted to this format.

func NormalizeLLMResult

func NormalizeLLMResult(llmResult *LLMAnalysisResult) []NormalizedTaintFlow

NormalizeLLMResult converts LLM test cases to normalized format.

func NormalizeToolResult

func NormalizeToolResult(toolResult *FunctionTaintResult) []NormalizedTaintFlow

NormalizeToolResult converts our tool's result to normalized format.

type OverallMetrics

type OverallMetrics struct {
	// Total functions analyzed
	TotalFunctions int

	// Confusion Matrix
	TruePositives  int // Tool detected, LLM confirmed ✅
	FalsePositives int // Tool detected, LLM says safe ⚠️
	FalseNegatives int // Tool missed, LLM found vuln ❌
	TrueNegatives  int // Tool skipped, LLM confirmed safe ✅

	// Metrics
	Precision float64 // TP / (TP + FP)
	Recall    float64 // TP / (TP + FN)
	F1Score   float64 // 2 * (P * R) / (P + R)
	Agreement float64 // (TP + TN) / Total

	// Processing stats
	LLMProcessingTime   string
	TotalProcessingTime string
	FunctionsPerSecond  float64

	// Failure breakdown
	FailuresByCategory map[string]int
	TopFailures        []FailureExample
}

OverallMetrics contains aggregated metrics across all functions.

func CalculateOverallMetrics

func CalculateOverallMetrics(
	comparisons []*DualLevelComparison,
	startTime time.Time,
) *OverallMetrics

CalculateOverallMetrics aggregates metrics from all function comparisons.

Performance: O(n) where n = number of comparisons

Example:

metrics := CalculateOverallMetrics(comparisons, startTime)
fmt.Printf("Precision: %.1f%%\n", metrics.Precision*100)
fmt.Printf("Recall: %.1f%%\n", metrics.Recall*100)
fmt.Printf("F1 Score: %.1f%%\n", metrics.F1Score*100)

type PatternLocation

type PatternLocation struct {
	// Pattern is the code pattern (e.g., "request.GET", "os.system")
	Pattern string `json:"pattern"`

	// Lines where this pattern appears
	Lines []int `json:"lines"`

	// Variables involved
	Variables []string `json:"variables"`

	// Category for semantic grouping
	// Examples: "user_input", "file_read", "sql_execution", "command_exec"
	Category string `json:"category"`

	// Description of what this pattern does
	Description string `json:"description"`

	// Severity (for sinks): CRITICAL, HIGH, MEDIUM, LOW
	Severity string `json:"severity,omitempty"`
}

PatternLocation describes where a pattern (source/sink/sanitizer) was found.

type PropagatorOperation

type PropagatorOperation struct {
	// Type: "assignment", "function_call", "return"
	Type string `json:"type"`

	// Line number
	Line int `json:"line"`

	// Source variable
	FromVar string `json:"from_var"`

	// Destination variable
	ToVar string `json:"to_var"`

	// Function name (if Type == "function_call")
	Function string `json:"function,omitempty"`
}

PropagatorOperation describes how data propagates.

type TestCaseSink

type TestCaseSink struct {
	Pattern  string `json:"pattern"` // e.g., "os.system"
	Line     int    `json:"line"`
	Variable string `json:"variable"`
}

TestCaseSink describes the sink in a test case.

type TestCaseSource

type TestCaseSource struct {
	Pattern  string `json:"pattern"` // e.g., "request.GET['cmd']"
	Line     int    `json:"line"`
	Variable string `json:"variable"`
}

TestCaseSource describes the source in a test case.

type ToolTaintFlow

type ToolTaintFlow struct {
	// Source information
	SourceLine     int
	SourceVariable string
	SourceType     string // e.g., "request.GET['username']"
	SourceCategory string // e.g., "user_input" (semantic)

	// Sink information
	SinkLine     int
	SinkVariable string
	SinkType     string // e.g., "sqlite3.execute"
	SinkCategory string // e.g., "sql_execution" (semantic)

	// Flow details
	FlowPath []FlowStep

	// Metadata
	VulnerabilityType string  // e.g., "SQL_INJECTION"
	Confidence        float64 // 0.0-1.0
	IsSanitized       bool    // If sanitizer detected in path
}

ToolTaintFlow represents a single taint flow detected by our tool.

type VariableTrack

type VariableTrack struct {
	Variable     string   `json:"variable"`
	FirstDefined int      `json:"first_defined"`
	LastUsed     int      `json:"last_used"`
	Aliases      []string `json:"aliases"` // Other variable names that hold the same data
	FlowsToLines []int    `json:"flows_to_lines"`
	FlowsToVars  []string `json:"flows_to_vars"`
}

VariableTrack shows how LLM traced a variable.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL