discovery

package
v0.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 27, 2026 License: GPL-3.0 Imports: 16 Imported by: 0

Documentation

Overview

Package discovery - carrier map builder and serialization

Package discovery provides auto-discovery of input carriers by tracing PHP superglobals

Package discovery - taint propagation engine

Index

Constants

This section is empty.

Variables

View Source
var PHPSuperglobals = sources.SuperglobalNames()

PHPSuperglobals references the centralized list from pkg/sources

Functions

func GroupByClass

func GroupByClass(usages []SuperglobalUsage) map[string][]SuperglobalUsage

GroupByClass groups usages by class name

func GroupBySuperglobal

func GroupBySuperglobal(usages []SuperglobalUsage) map[string][]SuperglobalUsage

GroupBySuperglobal groups usages by superglobal type

Types

type CarrierMap

type CarrierMap struct {
	CodebasePath string         `json:"codebase_path"`
	DiscoveredAt time.Time      `json:"discovered_at"`
	PHPVersion   string         `json:"php_version,omitempty"`
	Framework    string         `json:"framework,omitempty"`
	Carriers     []InputCarrier `json:"carriers"`
	Statistics   CarrierStats   `json:"statistics"`
}

CarrierMap stores discovered input carriers for a codebase

func BuildCarrierMap

func BuildCarrierMap(codebasePath string) (*CarrierMap, error)

BuildCarrierMap analyzes a codebase and builds a carrier map

func LoadCarrierMap

func LoadCarrierMap(path string) (*CarrierMap, error)

LoadCarrierMap loads a carrier map from a JSON file

func (*CarrierMap) FindCarrier

func (m *CarrierMap) FindCarrier(className, name string) *InputCarrier

FindCarrier looks up a carrier by class and property/method name

func (*CarrierMap) FindCarriersByClass

func (m *CarrierMap) FindCarriersByClass(className string) []InputCarrier

FindCarriersByClass returns all carriers for a given class

func (*CarrierMap) FindCarriersBySourceType

func (m *CarrierMap) FindCarriersBySourceType(sourceType string) []InputCarrier

FindCarriersBySourceType returns all carriers that have a specific source type

func (*CarrierMap) GetAllClassNames

func (m *CarrierMap) GetAllClassNames() []string

GetAllClassNames returns unique class names that have carriers

func (*CarrierMap) GetAllSourceTypes

func (m *CarrierMap) GetAllSourceTypes() []string

GetAllSourceTypes returns unique source types found

func (*CarrierMap) Merge

func (m *CarrierMap) Merge(other *CarrierMap)

Merge merges another carrier map into this one

func (*CarrierMap) SaveToFile

func (m *CarrierMap) SaveToFile(path string) error

SaveToFile saves the carrier map to a JSON file

func (*CarrierMap) Summary

func (m *CarrierMap) Summary() string

Summary returns a human-readable summary of the carrier map

type CarrierStats

type CarrierStats struct {
	TotalSuperglobalUsages int            `json:"total_superglobal_usages"`
	UniqueCarriers         int            `json:"unique_carriers"`
	TotalTaintFlows        int            `json:"total_taint_flows"`
	ClassesAnalyzed        int            `json:"classes_analyzed"`
	BySourceType           map[string]int `json:"by_source_type"`
	ByClassName            map[string]int `json:"by_class_name"`
}

CarrierStats holds statistics about the discovery

type ClassInfo

type ClassInfo struct {
	Name        string
	FilePath    string
	Properties  map[string]*PropertyInfo
	Methods     map[string]*MethodInfo
	Constructor *MethodInfo
	ParentClass string
	Implements  []string
}

ClassInfo stores information about a class

type InputCarrier

type InputCarrier struct {
	ClassName     string   `json:"class_name"`
	PropertyName  string   `json:"property_name,omitempty"` // Empty if method-based
	MethodName    string   `json:"method_name,omitempty"`   // Empty if property-based
	SourceTypes   []string `json:"source_types"`            // ["$_GET", "$_POST"]
	AccessPattern string   `json:"access_pattern"`          // "array", "method", "direct"
	PopulatedIn   string   `json:"populated_in"`            // Constructor or method name that populates it
	FilePath      string   `json:"file_path"`
	Line          int      `json:"line"`
}

InputCarrier represents a discovered class property or method that carries user input

type MethodInfo

type MethodInfo struct {
	Name           string
	IsPublic       bool
	IsProtected    bool
	IsPrivate      bool
	IsStatic       bool
	Line           int
	Parameters     []string
	ReturnsTainted bool
	TaintSources   []string // Superglobals that can flow through this method
	BodySource     string   // Only populated for methods that need deep analysis
	// contains filtered or unexported fields
}

MethodInfo stores information about a class method Optimized to avoid storing full body source when not needed

func (*MethodInfo) ReleaseBodySource

func (m *MethodInfo) ReleaseBodySource()

ReleaseBodySource releases the body source to free memory Call this after pattern analysis is complete

type PropertyInfo

type PropertyInfo struct {
	Name         string
	IsPublic     bool
	IsProtected  bool
	IsPrivate    bool
	IsStatic     bool
	DefaultValue string
	Line         int
	TaintSources []string // Superglobals that flow into this property
}

PropertyInfo stores information about a class property

type SuperglobalFinder

type SuperglobalFinder struct {
	// contains filtered or unexported fields
}

SuperglobalFinder finds all superglobal usages in a codebase

func NewSuperglobalFinder

func NewSuperglobalFinder() *SuperglobalFinder

NewSuperglobalFinder creates a new finder instance

func (*SuperglobalFinder) FindAll

func (f *SuperglobalFinder) FindAll(codebasePath string) ([]SuperglobalUsage, error)

FindAll scans a codebase and returns all superglobal usages (parallelized)

type SuperglobalUsage

type SuperglobalUsage struct {
	Superglobal  string `json:"superglobal"` // "$_GET", "$_POST", etc.
	Key          string `json:"key"`         // The accessed key, e.g., "id" from $_GET['id'], or "*" for all keys
	FilePath     string `json:"file_path"`
	Line         int    `json:"line"`
	Column       int    `json:"column"`
	AssignedTo   string `json:"assigned_to"`    // Variable or property it's assigned to
	Context      string `json:"context"`        // "assignment", "function_arg", "method_arg", "return", "foreach", "direct"
	ClassName    string `json:"class_name"`     // If inside a class method
	MethodName   string `json:"method_name"`    // If inside a method/function
	CalledMethod string `json:"called_method"`  // Method being called when context is "function_arg" (e.g., "parse_incoming")
	CodeSnippet  string `json:"code_snippet"`   // The actual code line
	IsLoopVar    bool   `json:"is_loop_var"`    // True if used as foreach source
	LoopKeyVar   string `json:"loop_key_var"`   // The key variable in foreach
	LoopValVar   string `json:"loop_value_var"` // The value variable in foreach
}

SuperglobalUsage tracks where a superglobal is used in the codebase

func FilterByClass

func FilterByClass(usages []SuperglobalUsage, className string) []SuperglobalUsage

FilterByClass filters usages by class name

func FilterByContext

func FilterByContext(usages []SuperglobalUsage, context string) []SuperglobalUsage

FilterByContext filters usages by context type

type TaintFlow

type TaintFlow struct {
	Source   TaintSource `json:"source"`
	Sink     TaintSink   `json:"sink"`
	FilePath string      `json:"file_path"`
	Line     int         `json:"line"`
	FlowType string      `json:"flow_type"` // "direct_assignment", "foreach_population", "method_return", "parameter_pass"
}

TaintFlow represents one propagation path from source to sink

type TaintPropagator

type TaintPropagator struct {
	// contains filtered or unexported fields
}

TaintPropagator traces data flow from superglobals to class properties/methods

func NewTaintPropagator

func NewTaintPropagator() *TaintPropagator

NewTaintPropagator creates a new taint propagation engine

func (*TaintPropagator) AnalyzeCodebase

func (p *TaintPropagator) AnalyzeCodebase(codebasePath string) error

AnalyzeCodebase analyzes a codebase to discover input carriers Memory-optimized to release ASTs and method bodies after extraction

func (*TaintPropagator) Clear

func (p *TaintPropagator) Clear()

Clear releases all memory held by the propagator

func (*TaintPropagator) GetCarriers

func (p *TaintPropagator) GetCarriers() []InputCarrier

GetCarriers returns all discovered input carriers

func (*TaintPropagator) GetClassInfo

func (p *TaintPropagator) GetClassInfo() map[string]*ClassInfo

GetClassInfo returns class information

func (*TaintPropagator) GetFlows

func (p *TaintPropagator) GetFlows() []TaintFlow

GetFlows returns all discovered taint flows

type TaintSink

type TaintSink struct {
	Type         string `json:"type"`          // "property", "method", "variable", "function", "array_element"
	ClassName    string `json:"class_name"`    // For properties/methods
	Name         string `json:"name"`          // Property name or method name
	IsPublic     bool   `json:"is_public"`     // Can be accessed externally
	AccessMethod string `json:"access_method"` // "array" for $obj->prop['key'], "method" for $obj->method('key'), "direct" for $obj->prop
}

TaintSink represents where tainted data ends up

type TaintSource

type TaintSource struct {
	Type     string   `json:"type"` // "superglobal", "function_return", "property"
	Name     string   `json:"name"` // "$_GET", "$_POST", etc.
	Keys     []string `json:"keys"` // Keys accessed, or ["*"] for all keys
	FilePath string   `json:"file_path"`
	Line     int      `json:"line"`
}

TaintSource represents the origin of tainted data

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL