Documentation
¶
Overview ¶
Package common - framework_patterns.go provides framework pattern definitions All framework-specific patterns should be defined using these types
Package common - regex_patterns.go provides pre-compiled regex patterns for input detection These patterns are used across semantic analyzers for consistent pattern matching
Package common - source_types.go provides centralized SourceType definitions. SourceType is now a type alias pointing to the canonical definition in pkg/sources/core.
Index ¶
- Constants
- Variables
- func GetAllFrameworkDetectors() map[string]*FrameworkDetector
- func GetOrCompileRegex(pattern string) (*regexp.Regexp, error)
- func IsKeyword(word string) bool
- func IsKeywordForLanguage(word, lang string) bool
- func IsValidSourceType(s string) bool
- func RegisterFrameworkDetector(detector *FrameworkDetector)
- type BaseMatcher
- type Definition
- type FrameworkDetector
- type FrameworkPattern
- type FrameworkPatternRegistry
- func (r *FrameworkPatternRegistry) Count() int
- func (r *FrameworkPatternRegistry) GetAll() []*FrameworkPattern
- func (r *FrameworkPatternRegistry) GetByFramework(framework string) []*FrameworkPattern
- func (r *FrameworkPatternRegistry) GetByID(id string) *FrameworkPattern
- func (r *FrameworkPatternRegistry) GetFrameworks() []string
- func (r *FrameworkPatternRegistry) Register(pattern *FrameworkPattern)
- func (r *FrameworkPatternRegistry) RegisterAll(patterns []*FrameworkPattern)
- type InputLabel
- type InputPattern
- type Match
- type Matcher
- type SourceType
Constants ¶
const ( SourceHTTPGet = core.SourceHTTPGet // Query string parameters SourceHTTPPost = core.SourceHTTPPost // POST form data SourceHTTPBody = core.SourceHTTPBody // Raw request body SourceHTTPJSON = core.SourceHTTPJSON // JSON request body SourceHTTPHeader = core.SourceHTTPHeader // HTTP headers SourceHTTPCookie = core.SourceHTTPCookie // Cookies SourceHTTPPath = core.SourceHTTPPath // URL path parameters SourceHTTPFile = core.SourceHTTPFile // Uploaded files ($_FILES) SourceHTTPRequest = core.SourceHTTPRequest // Combined GET/POST ($_REQUEST) SourceSession = core.SourceSession // Session data ($_SESSION) SourceCLIArg = core.SourceCLIArg // Command line arguments SourceEnvVar = core.SourceEnvVar // Environment variables SourceStdin = core.SourceStdin // Standard input SourceFile = core.SourceFile // File reads SourceDatabase = core.SourceDatabase // Database query results SourceNetwork = core.SourceNetwork // Network/socket reads SourceUserInput = core.SourceUserInput // Generic user input SourceUnknown = core.SourceUnknown // Unknown source type )
Re-export SourceType constants from core for backward compatibility.
const ( LabelHTTPGet = core.LabelHTTPGet LabelHTTPPost = core.LabelHTTPPost LabelHTTPCookie = core.LabelHTTPCookie LabelHTTPHeader = core.LabelHTTPHeader LabelHTTPBody = core.LabelHTTPBody LabelCLI = core.LabelCLI LabelEnvironment = core.LabelEnvironment LabelFile = core.LabelFile LabelDatabase = core.LabelDatabase LabelNetwork = core.LabelNetwork LabelUserInput = core.LabelUserInput )
Re-export InputLabel constants from core for backward compatibility.
Variables ¶
var ( // SuperglobalPattern matches PHP superglobal array access // e.g., $_GET['key'], $_POST["key"], $_REQUEST[$var] SuperglobalPattern = regexp.MustCompile(`\$_(GET|POST|COOKIE|REQUEST|SERVER|FILES|SESSION|ENV)\[['"]?([\w\-]+)['"]?\]`) // SuperglobalForeachPattern matches PHP foreach over superglobals // e.g., foreach ($_GET as $key => $value) SuperglobalForeachPattern = regexp.MustCompile(`foreach\s*\(\s*(\$_\w+)\s+as\s+\$(\w+)\s*=>\s*\$(\w+)\s*\)`) // SuperglobalSimplePattern matches just the superglobal name // e.g., $_GET, $_POST (without array access) SuperglobalSimplePattern = regexp.MustCompile(`\$_(GET|POST|COOKIE|REQUEST|SERVER|FILES|SESSION|ENV)`) )
Pre-compiled PHP superglobal patterns
var ( // MethodCallPattern matches object method calls // e.g., $obj->method(, $request->input( MethodCallPattern = regexp.MustCompile(`\$(\w+)->(\w+)\s*\(`) // MethodCallWithArgsPattern matches method calls with string arguments // e.g., $var->method('arg'), $var->method("arg") MethodCallWithArgsPattern = regexp.MustCompile(`\$(\w+)->(\w+)\s*\(\s*['"]([^'"]*)['"]\s*(?:,\s*[^)]+)?\s*\)`) // PropertyArrayPattern matches property with array access // e.g., $obj->data['key'], $request->query["param"] PropertyArrayPattern = regexp.MustCompile(`\$(\w+)->(\w+)\[['"]?([\w\-]+)['"]?\]`) // SimplePropertyPattern matches simple property access (no array, no method call) // e.g., $obj->data, $request->body SimplePropertyPattern = regexp.MustCompile(`\$(\w+)->(\w+)(?:[^\[\(]|$)`) )
Pre-compiled method/property patterns
var ( // InputMethodPattern matches universal PHP input method patterns // e.g., ->get_input(, ->getInput(, ->input( InputMethodPattern = regexp.MustCompile(`(?i)->(?:get_?)?(?:input|var|variable|query_?params?|parsed_?body|cookie_?params?|server_?params?|uploaded_?files?|headers?|all|post|cookie|param)s?\s*\(`) // InputPropertyPattern matches universal PHP input property patterns // e.g., ->input[, ->data[, ->request[ InputPropertyPattern = regexp.MustCompile(`(?i)->(?:input|request|params?|query|cookies?|headers?|body|data|args?|post|get|files?|server|attributes?|payload)s?\[`) // InputObjectPattern matches objects that typically carry user input // NOTE: This is GENERIC - framework-specific objects are detected separately // e.g., $request, $input, $ctx, $context InputObjectPattern = regexp.MustCompile(`(?i)(request|input|req|params?|http|ctx|context|getRequest\(\)|getApplication\(\))`) )
Pre-compiled input method patterns NOTE: These are UNIVERSAL patterns that work across any framework. Framework-specific patterns (MyBB, phpBB, WordPress, etc.) should be defined in pkg/sources/{language}/{framework}.go
var ( // ContextDependentMethodPattern matches methods that may be input getters // but need context to determine (e.g., ->get( could be cache get or input get) ContextDependentMethodPattern = regexp.MustCompile(`(?i)->(?:get_?)?(?:val|text|int|bool|array|raw_?val|check)\s*\(`) // ExcludeMethodPattern matches methods that look like input but aren't // e.g., ->getData( is often a generic getter, not input ExcludeMethodPattern = regexp.MustCompile(`(?i)->(?:getData|getBody|getContent|fetch|find|load|read)\s*\(`) )
Pre-compiled context-dependent method patterns (may or may not indicate input)
var ( // JSRequestPattern matches common JS request object access // e.g., req.body, req.query, req.params, request.body JSRequestPattern = regexp.MustCompile(`(?:req|request|ctx)\.(?:body|query|params|cookies|headers|files?)`) // JSPropertyAccessPattern matches JS property access that may be input // e.g., ctx.request.body, event.body JSPropertyAccessPattern = regexp.MustCompile(`\b(\w+)\.(\w+)(?:\.(\w+))?`) )
Pre-compiled JavaScript/TypeScript patterns
var ( // PythonFlaskPattern matches Flask request access // e.g., request.form, request.args, request.json PythonFlaskPattern = regexp.MustCompile(`request\.(?:form|args|json|data|values|files|cookies|headers)`) // PythonDjangoPattern matches Django request access // e.g., request.GET, request.POST, request.COOKIES PythonDjangoPattern = regexp.MustCompile(`request\.(?:GET|POST|COOKIES|META|FILES|body|data)`) // PythonArgparsePattern matches argparse parsed args access // e.g., args.username, args.verbose PythonArgparsePattern = regexp.MustCompile(`args\.(\w+)`) )
Pre-compiled Python patterns
var ( // GoRequestPattern matches Go http.Request access // e.g., r.URL.Query(), r.FormValue(, r.Header.Get( GoRequestPattern = regexp.MustCompile(`(?:r|req|request)\.(?:URL\.Query|FormValue|Header|Cookie|Body|Form|PostForm)`) // GoGinPattern matches Gin framework access // e.g., c.Query(, c.Param(, c.PostForm( GoGinPattern = regexp.MustCompile(`c\.(?:Query|Param|PostForm|GetHeader|Cookie|ShouldBind)`) // GoEchoPattern matches Echo framework access // e.g., c.QueryParam(, c.Param(, c.FormValue( GoEchoPattern = regexp.MustCompile(`c\.(?:QueryParam|Param|FormValue|Request)`) )
Pre-compiled Go patterns
var ( // JavaServletPattern matches HttpServletRequest access // e.g., request.getParameter(, request.getHeader( JavaServletPattern = regexp.MustCompile(`(?:request|req|httpRequest)\.get(?:Parameter|Header|Cookie|Attribute|Session)\s*\(`) // JavaSpringPattern matches Spring annotation parameters // e.g., @RequestParam, @PathVariable, @RequestBody JavaSpringAnnotationPattern = regexp.MustCompile(`@(?:RequestParam|PathVariable|RequestBody|RequestHeader|CookieValue|ModelAttribute|RequestPart|MatrixVariable)`) )
Pre-compiled Java patterns
var ( // CArgvPattern matches C main function argv access // e.g., argv[1], argv[i] CArgvPattern = regexp.MustCompile(`argv\s*\[\s*(\d+|\w+)\s*\]`) // CEnvPattern matches C environment access // e.g., getenv("PATH"), envp[0] CEnvPattern = regexp.MustCompile(`(?:getenv\s*\(|envp\s*\[|environ\s*\[)`) // CStdinPattern matches C stdin reads // e.g., scanf(, gets(, fgets(stdin CStdinPattern = regexp.MustCompile(`(?:scanf|gets|fgets|getchar|getc|getline)\s*\(`) )
Pre-compiled C/C++ patterns
var AllSourceTypes = core.AllSourceTypes
AllSourceTypes lists all valid source types for iteration/validation.
var LanguageKeywords = map[string][]string{
"php": {
"isset", "empty", "unset", "echo", "print",
"is_null", "is_string", "is_array", "is_int", "is_bool", "is_float",
"array", "list", "global", "static",
},
"python": {
"None", "True", "False", "and", "or", "not", "is", "in",
"lambda", "pass", "with", "as", "global", "nonlocal",
"assert", "raise", "except", "from",
},
"javascript": {
"undefined", "null", "true", "false", "typeof", "instanceof",
"arguments", "debugger", "delete", "void", "with",
"NaN", "Infinity",
},
"typescript": {
"undefined", "null", "true", "false", "typeof", "instanceof",
"arguments", "debugger", "delete", "void", "with",
"NaN", "Infinity", "keyof", "readonly", "infer", "never",
},
"go": {
"nil", "true", "false", "iota",
"make", "new", "append", "copy", "delete", "len", "cap",
"panic", "recover", "defer", "go", "select", "chan",
"range", "fallthrough", "goto", "map", "struct",
},
"java": {
"null", "true", "false", "instanceof",
"extends", "implements", "throws", "native", "synchronized",
"volatile", "transient", "strictfp", "assert",
},
"c": {
"NULL", "sizeof", "typedef", "extern", "register", "volatile",
"inline", "restrict", "auto", "goto",
},
"cpp": {
"NULL", "nullptr", "sizeof", "typedef", "extern", "register",
"volatile", "inline", "restrict", "auto", "goto",
"template", "typename", "virtual", "override", "delete",
"constexpr", "noexcept", "decltype", "static_cast", "dynamic_cast",
},
"c_sharp": {
"null", "true", "false", "typeof", "sizeof", "nameof",
"checked", "unchecked", "lock", "fixed", "stackalloc",
"internal", "sealed", "virtual", "override", "readonly",
"ref", "out", "in", "params", "where", "default",
},
"ruby": {
"nil", "true", "false", "self", "super",
"defined?", "begin", "end", "rescue", "ensure", "raise",
"alias", "undef", "redo", "retry", "yield",
},
"rust": {
"true", "false", "self", "Self", "super", "crate",
"move", "ref", "box", "dyn", "where", "unsafe",
"extern", "mod", "pub", "priv", "loop", "match",
},
}
LanguageKeywords provides language-specific keyword lists
var PHPInputPatterns = []InputPattern{ {Regex: regexp.MustCompile(`\$_GET\[`), Name: "$_GET"}, {Regex: regexp.MustCompile(`\$_POST\[`), Name: "$_POST"}, {Regex: regexp.MustCompile(`\$_REQUEST\[`), Name: "$_REQUEST"}, {Regex: regexp.MustCompile(`\$_COOKIE\[`), Name: "$_COOKIE"}, {Regex: regexp.MustCompile(`\$_FILES\[`), Name: "$_FILES"}, {Regex: regexp.MustCompile(`\$_SERVER\[`), Name: "$_SERVER"}, {Regex: regexp.MustCompile(`\$request->input\(`), Name: "$request->input()"}, {Regex: regexp.MustCompile(`\$request->get\(`), Name: "$request->get()"}, {Regex: regexp.MustCompile(`\$request->post\(`), Name: "$request->post()"}, {Regex: regexp.MustCompile(`\$request->query\[`), Name: "$request->query"}, {Regex: regexp.MustCompile(`\$request->all\(`), Name: "$request->all()"}, {Regex: regexp.MustCompile(`->getQueryParams\(`), Name: "->getQueryParams()"}, {Regex: regexp.MustCompile(`->getParsedBody\(`), Name: "->getParsedBody()"}, {Regex: regexp.MustCompile(`->getCookieParams\(`), Name: "->getCookieParams()"}, }
PHPInputPatterns contains pre-compiled patterns for PHP input access Used for detecting input sources in expressions without carrier map
NOTE: These are GENERIC patterns. Framework-specific patterns (MyBB, WordPress, etc.) are defined in pkg/sources/php/{framework}.go and registered separately.
var UniversalKeywords = map[string]bool{ "if": true, "else": true, "elseif": true, "elif": true, "switch": true, "case": true, "default": true, "for": true, "while": true, "do": true, "foreach": true, "break": true, "continue": true, "return": true, "try": true, "catch": true, "finally": true, "throw": true, "true": true, "false": true, "null": true, "nil": true, "undefined": true, "True": true, "False": true, "None": true, "and": true, "or": true, "not": true, "is": true, "in": true, "function": true, "func": true, "def": true, "fn": true, "var": true, "let": true, "const": true, "mut": true, "class": true, "interface": true, "struct": true, "enum": true, "type": true, "impl": true, "trait": true, "public": true, "private": true, "protected": true, "static": true, "final": true, "abstract": true, "async": true, "await": true, "yield": true, "new": true, "this": true, "self": true, "super": true, "instanceof": true, "typeof": true, "isset": true, "empty": true, "unset": true, "echo": true, "print": true, "import": true, "export": true, "require": true, "include": true, "use": true, "namespace": true, "package": true, "module": true, }
UniversalKeywords contains keywords to filter from variable extraction. These are common across most languages and should not be treated as variable names.
Functions ¶
func GetAllFrameworkDetectors ¶
func GetAllFrameworkDetectors() map[string]*FrameworkDetector
GetAllFrameworkDetectors returns all registered detectors
func GetOrCompileRegex ¶
GetOrCompileRegex returns a cached or newly compiled regex
func IsKeywordForLanguage ¶
IsKeywordForLanguage checks if a word is a keyword for a specific language
func IsValidSourceType ¶
IsValidSourceType checks if a string is a valid SourceType.
func RegisterFrameworkDetector ¶
func RegisterFrameworkDetector(detector *FrameworkDetector)
RegisterFrameworkDetector registers a framework detector
Types ¶
type BaseMatcher ¶
type BaseMatcher struct {
// contains filtered or unexported fields
}
BaseMatcher provides common functionality for source matching
func NewBaseMatcher ¶
func NewBaseMatcher(language string, sources []Definition) *BaseMatcher
NewBaseMatcher creates a new base matcher and pre-compiles all regex patterns. It panics if any Pattern, ExcludePattern, or KeyExtractor field contains an invalid regular expression, so invalid patterns are caught at startup rather than silently swallowed on every AST node visit.
func (*BaseMatcher) FindSources ¶
func (m *BaseMatcher) FindSources(root *sitter.Node, src []byte) []Match
FindSources finds all input sources in the AST
func (*BaseMatcher) Language ¶
func (m *BaseMatcher) Language() string
Language returns the language this matcher handles
type Definition ¶
type Definition struct {
Name string // e.g., "$_GET", "req.body"
Pattern string // Regex pattern to match
Language string // Target language
Labels []InputLabel // Categories
Description string // Human-readable description
NodeTypes []string // Tree-sitter node types to match
KeyExtractor string // Regex to extract key (e.g., from $_GET['key'])
ExcludeParentTypes []string // Skip match if node's parent is one of these AST types
ExcludePattern string // Regex pattern - skip match if node text matches this
// contains filtered or unexported fields
}
Definition represents a user input source definition
type FrameworkDetector ¶
type FrameworkDetector struct {
Framework string `json:"framework"`
Indicators []string `json:"indicators"` // File paths that indicate this framework
}
FrameworkDetector defines file path indicators for framework detection
func GetFrameworkDetector ¶
func GetFrameworkDetector(framework string) *FrameworkDetector
GetFrameworkDetector returns a framework detector by name
type FrameworkPattern ¶
type FrameworkPattern struct {
ID string `json:"id"`
Framework string `json:"framework"`
Language string `json:"language"`
Name string `json:"name"`
Description string `json:"description"`
// Pattern matching (regex strings)
ClassPattern string `json:"class_pattern,omitempty"` // Regex for class names
MethodPattern string `json:"method_pattern,omitempty"` // Regex for method names
PropertyPattern string `json:"property_pattern,omitempty"` // Regex for property names
AccessPattern string `json:"access_pattern,omitempty"` // How data is accessed: "array", "method", "property", "superglobal"
// Source mapping
SourceType SourceType `json:"source_type"`
SourceKey string `json:"source_key,omitempty"` // How to extract the key
// Flow information (for carrier tracking)
CarrierClass string `json:"carrier_class,omitempty"`
CarrierProperty string `json:"carrier_property,omitempty"`
PopulatedBy string `json:"populated_by,omitempty"` // Method that populates the carrier
PopulatedFrom []string `json:"populated_from,omitempty"` // Original sources (e.g., ["$_GET", "$_POST"])
// Tags for categorization
Tags []string `json:"tags,omitempty"`
}
FrameworkPattern defines a framework-specific input source pattern This is the centralized definition - all language analyzers should use this
type FrameworkPatternRegistry ¶
type FrameworkPatternRegistry struct {
// contains filtered or unexported fields
}
FrameworkPatternRegistry manages framework patterns for a language
func NewFrameworkPatternRegistry ¶
func NewFrameworkPatternRegistry(language string) *FrameworkPatternRegistry
NewFrameworkPatternRegistry creates a new registry for a language
func (*FrameworkPatternRegistry) Count ¶
func (r *FrameworkPatternRegistry) Count() int
Count returns the total number of registered patterns
func (*FrameworkPatternRegistry) GetAll ¶
func (r *FrameworkPatternRegistry) GetAll() []*FrameworkPattern
GetAll returns all registered patterns
func (*FrameworkPatternRegistry) GetByFramework ¶
func (r *FrameworkPatternRegistry) GetByFramework(framework string) []*FrameworkPattern
GetByFramework returns all patterns for a specific framework
func (*FrameworkPatternRegistry) GetByID ¶
func (r *FrameworkPatternRegistry) GetByID(id string) *FrameworkPattern
GetByID returns a pattern by its ID
func (*FrameworkPatternRegistry) GetFrameworks ¶
func (r *FrameworkPatternRegistry) GetFrameworks() []string
GetFrameworks returns a list of all registered frameworks
func (*FrameworkPatternRegistry) Register ¶
func (r *FrameworkPatternRegistry) Register(pattern *FrameworkPattern)
Register adds a pattern to the registry
func (*FrameworkPatternRegistry) RegisterAll ¶
func (r *FrameworkPatternRegistry) RegisterAll(patterns []*FrameworkPattern)
RegisterAll adds multiple patterns to the registry
type InputLabel ¶
type InputLabel = core.InputLabel
InputLabel represents the category of user input. This is a type alias — the canonical definition lives in pkg/sources/core.
type InputPattern ¶
type InputPattern struct {
Regex *regexp.Regexp
Name string // Human-readable name for matched input source
}
InputPattern represents a compiled regex pattern for input detection
type Match ¶
type Match struct {
SourceType string // e.g., "$_GET", "req.body"
Key string // e.g., "username" in $_GET['username']
Variable string // Variable name if assigned
Line int
Column int
EndLine int
EndColumn int
Snippet string
Labels []InputLabel
}
Match represents a matched source in code
type SourceType ¶
type SourceType = core.SourceType
SourceType represents the semantic type of an input source. This is a type alias — the canonical definition lives in pkg/sources/core.