common

package
v0.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 27, 2026 License: GPL-3.0 Imports: 5 Imported by: 0

Documentation

Overview

Package common - framework_patterns.go provides framework pattern definitions All framework-specific patterns should be defined using these types

Package common - regex_patterns.go provides pre-compiled regex patterns for input detection These patterns are used across semantic analyzers for consistent pattern matching

Package common - source_types.go provides centralized SourceType definitions. SourceType is now a type alias pointing to the canonical definition in pkg/sources/core.

Index

Constants

View Source
const (
	SourceHTTPGet     = core.SourceHTTPGet     // Query string parameters
	SourceHTTPPost    = core.SourceHTTPPost    // POST form data
	SourceHTTPBody    = core.SourceHTTPBody    // Raw request body
	SourceHTTPJSON    = core.SourceHTTPJSON    // JSON request body
	SourceHTTPHeader  = core.SourceHTTPHeader  // HTTP headers
	SourceHTTPCookie  = core.SourceHTTPCookie  // Cookies
	SourceHTTPPath    = core.SourceHTTPPath    // URL path parameters
	SourceHTTPFile    = core.SourceHTTPFile    // Uploaded files ($_FILES)
	SourceHTTPRequest = core.SourceHTTPRequest // Combined GET/POST ($_REQUEST)
	SourceSession     = core.SourceSession     // Session data ($_SESSION)
	SourceCLIArg      = core.SourceCLIArg      // Command line arguments
	SourceEnvVar      = core.SourceEnvVar      // Environment variables
	SourceStdin       = core.SourceStdin       // Standard input
	SourceFile        = core.SourceFile        // File reads
	SourceDatabase    = core.SourceDatabase    // Database query results
	SourceNetwork     = core.SourceNetwork     // Network/socket reads
	SourceUserInput   = core.SourceUserInput   // Generic user input
	SourceUnknown     = core.SourceUnknown     // Unknown source type
)

Re-export SourceType constants from core for backward compatibility.

View Source
const (
	LabelHTTPGet     = core.LabelHTTPGet
	LabelHTTPPost    = core.LabelHTTPPost
	LabelHTTPCookie  = core.LabelHTTPCookie
	LabelHTTPHeader  = core.LabelHTTPHeader
	LabelHTTPBody    = core.LabelHTTPBody
	LabelCLI         = core.LabelCLI
	LabelEnvironment = core.LabelEnvironment
	LabelFile        = core.LabelFile
	LabelDatabase    = core.LabelDatabase
	LabelNetwork     = core.LabelNetwork
	LabelUserInput   = core.LabelUserInput
)

Re-export InputLabel constants from core for backward compatibility.

Variables

View Source
var (
	// SuperglobalPattern matches PHP superglobal array access
	// e.g., $_GET['key'], $_POST["key"], $_REQUEST[$var]
	SuperglobalPattern = regexp.MustCompile(`\$_(GET|POST|COOKIE|REQUEST|SERVER|FILES|SESSION|ENV)\[['"]?([\w\-]+)['"]?\]`)

	// SuperglobalForeachPattern matches PHP foreach over superglobals
	// e.g., foreach ($_GET as $key => $value)
	SuperglobalForeachPattern = regexp.MustCompile(`foreach\s*\(\s*(\$_\w+)\s+as\s+\$(\w+)\s*=>\s*\$(\w+)\s*\)`)

	// SuperglobalSimplePattern matches just the superglobal name
	// e.g., $_GET, $_POST (without array access)
	SuperglobalSimplePattern = regexp.MustCompile(`\$_(GET|POST|COOKIE|REQUEST|SERVER|FILES|SESSION|ENV)`)
)

Pre-compiled PHP superglobal patterns

View Source
var (
	// MethodCallPattern matches object method calls
	// e.g., $obj->method(, $request->input(
	MethodCallPattern = regexp.MustCompile(`\$(\w+)->(\w+)\s*\(`)

	// MethodCallWithArgsPattern matches method calls with string arguments
	// e.g., $var->method('arg'), $var->method("arg")
	MethodCallWithArgsPattern = regexp.MustCompile(`\$(\w+)->(\w+)\s*\(\s*['"]([^'"]*)['"]\s*(?:,\s*[^)]+)?\s*\)`)

	// PropertyArrayPattern matches property with array access
	// e.g., $obj->data['key'], $request->query["param"]
	PropertyArrayPattern = regexp.MustCompile(`\$(\w+)->(\w+)\[['"]?([\w\-]+)['"]?\]`)

	// SimplePropertyPattern matches simple property access (no array, no method call)
	// e.g., $obj->data, $request->body
	SimplePropertyPattern = regexp.MustCompile(`\$(\w+)->(\w+)(?:[^\[\(]|$)`)
)

Pre-compiled method/property patterns

View Source
var (
	// InputMethodPattern matches universal PHP input method patterns
	// e.g., ->get_input(, ->getInput(, ->input(
	InputMethodPattern = regexp.MustCompile(`(?i)->(?:get_?)?(?:input|var|variable|query_?params?|parsed_?body|cookie_?params?|server_?params?|uploaded_?files?|headers?|all|post|cookie|param)s?\s*\(`)

	// InputPropertyPattern matches universal PHP input property patterns
	// e.g., ->input[, ->data[, ->request[
	InputPropertyPattern = regexp.MustCompile(`(?i)->(?:input|request|params?|query|cookies?|headers?|body|data|args?|post|get|files?|server|attributes?|payload)s?\[`)

	// InputObjectPattern matches objects that typically carry user input
	// NOTE: This is GENERIC - framework-specific objects are detected separately
	// e.g., $request, $input, $ctx, $context
	InputObjectPattern = regexp.MustCompile(`(?i)(request|input|req|params?|http|ctx|context|getRequest\(\)|getApplication\(\))`)
)

Pre-compiled input method patterns NOTE: These are UNIVERSAL patterns that work across any framework. Framework-specific patterns (MyBB, phpBB, WordPress, etc.) should be defined in pkg/sources/{language}/{framework}.go

View Source
var (
	// ContextDependentMethodPattern matches methods that may be input getters
	// but need context to determine (e.g., ->get( could be cache get or input get)
	ContextDependentMethodPattern = regexp.MustCompile(`(?i)->(?:get_?)?(?:val|text|int|bool|array|raw_?val|check)\s*\(`)

	// ExcludeMethodPattern matches methods that look like input but aren't
	// e.g., ->getData( is often a generic getter, not input
	ExcludeMethodPattern = regexp.MustCompile(`(?i)->(?:getData|getBody|getContent|fetch|find|load|read)\s*\(`)
)

Pre-compiled context-dependent method patterns (may or may not indicate input)

View Source
var (
	// JSRequestPattern matches common JS request object access
	// e.g., req.body, req.query, req.params, request.body
	JSRequestPattern = regexp.MustCompile(`(?:req|request|ctx)\.(?:body|query|params|cookies|headers|files?)`)

	// JSPropertyAccessPattern matches JS property access that may be input
	// e.g., ctx.request.body, event.body
	JSPropertyAccessPattern = regexp.MustCompile(`\b(\w+)\.(\w+)(?:\.(\w+))?`)
)

Pre-compiled JavaScript/TypeScript patterns

View Source
var (
	// PythonFlaskPattern matches Flask request access
	// e.g., request.form, request.args, request.json
	PythonFlaskPattern = regexp.MustCompile(`request\.(?:form|args|json|data|values|files|cookies|headers)`)

	// PythonDjangoPattern matches Django request access
	// e.g., request.GET, request.POST, request.COOKIES
	PythonDjangoPattern = regexp.MustCompile(`request\.(?:GET|POST|COOKIES|META|FILES|body|data)`)

	// PythonArgparsePattern matches argparse parsed args access
	// e.g., args.username, args.verbose
	PythonArgparsePattern = regexp.MustCompile(`args\.(\w+)`)
)

Pre-compiled Python patterns

View Source
var (
	// GoRequestPattern matches Go http.Request access
	// e.g., r.URL.Query(), r.FormValue(, r.Header.Get(
	GoRequestPattern = regexp.MustCompile(`(?:r|req|request)\.(?:URL\.Query|FormValue|Header|Cookie|Body|Form|PostForm)`)

	// GoGinPattern matches Gin framework access
	// e.g., c.Query(, c.Param(, c.PostForm(
	GoGinPattern = regexp.MustCompile(`c\.(?:Query|Param|PostForm|GetHeader|Cookie|ShouldBind)`)

	// GoEchoPattern matches Echo framework access
	// e.g., c.QueryParam(, c.Param(, c.FormValue(
	GoEchoPattern = regexp.MustCompile(`c\.(?:QueryParam|Param|FormValue|Request)`)
)

Pre-compiled Go patterns

View Source
var (
	// JavaServletPattern matches HttpServletRequest access
	// e.g., request.getParameter(, request.getHeader(
	JavaServletPattern = regexp.MustCompile(`(?:request|req|httpRequest)\.get(?:Parameter|Header|Cookie|Attribute|Session)\s*\(`)

	// JavaSpringPattern matches Spring annotation parameters
	// e.g., @RequestParam, @PathVariable, @RequestBody
	JavaSpringAnnotationPattern = regexp.MustCompile(`@(?:RequestParam|PathVariable|RequestBody|RequestHeader|CookieValue|ModelAttribute|RequestPart|MatrixVariable)`)
)

Pre-compiled Java patterns

View Source
var (
	// CArgvPattern matches C main function argv access
	// e.g., argv[1], argv[i]
	CArgvPattern = regexp.MustCompile(`argv\s*\[\s*(\d+|\w+)\s*\]`)

	// CEnvPattern matches C environment access
	// e.g., getenv("PATH"), envp[0]
	CEnvPattern = regexp.MustCompile(`(?:getenv\s*\(|envp\s*\[|environ\s*\[)`)

	// CStdinPattern matches C stdin reads
	// e.g., scanf(, gets(, fgets(stdin
	CStdinPattern = regexp.MustCompile(`(?:scanf|gets|fgets|getchar|getc|getline)\s*\(`)
)

Pre-compiled C/C++ patterns

View Source
var AllSourceTypes = core.AllSourceTypes

AllSourceTypes lists all valid source types for iteration/validation.

View Source
var LanguageKeywords = map[string][]string{
	"php": {
		"isset", "empty", "unset", "echo", "print",
		"is_null", "is_string", "is_array", "is_int", "is_bool", "is_float",
		"array", "list", "global", "static",
	},
	"python": {
		"None", "True", "False", "and", "or", "not", "is", "in",
		"lambda", "pass", "with", "as", "global", "nonlocal",
		"assert", "raise", "except", "from",
	},
	"javascript": {
		"undefined", "null", "true", "false", "typeof", "instanceof",
		"arguments", "debugger", "delete", "void", "with",
		"NaN", "Infinity",
	},
	"typescript": {
		"undefined", "null", "true", "false", "typeof", "instanceof",
		"arguments", "debugger", "delete", "void", "with",
		"NaN", "Infinity", "keyof", "readonly", "infer", "never",
	},
	"go": {
		"nil", "true", "false", "iota",
		"make", "new", "append", "copy", "delete", "len", "cap",
		"panic", "recover", "defer", "go", "select", "chan",
		"range", "fallthrough", "goto", "map", "struct",
	},
	"java": {
		"null", "true", "false", "instanceof",
		"extends", "implements", "throws", "native", "synchronized",
		"volatile", "transient", "strictfp", "assert",
	},
	"c": {
		"NULL", "sizeof", "typedef", "extern", "register", "volatile",
		"inline", "restrict", "auto", "goto",
	},
	"cpp": {
		"NULL", "nullptr", "sizeof", "typedef", "extern", "register",
		"volatile", "inline", "restrict", "auto", "goto",
		"template", "typename", "virtual", "override", "delete",
		"constexpr", "noexcept", "decltype", "static_cast", "dynamic_cast",
	},
	"c_sharp": {
		"null", "true", "false", "typeof", "sizeof", "nameof",
		"checked", "unchecked", "lock", "fixed", "stackalloc",
		"internal", "sealed", "virtual", "override", "readonly",
		"ref", "out", "in", "params", "where", "default",
	},
	"ruby": {
		"nil", "true", "false", "self", "super",
		"defined?", "begin", "end", "rescue", "ensure", "raise",
		"alias", "undef", "redo", "retry", "yield",
	},
	"rust": {
		"true", "false", "self", "Self", "super", "crate",
		"move", "ref", "box", "dyn", "where", "unsafe",
		"extern", "mod", "pub", "priv", "loop", "match",
	},
}

LanguageKeywords provides language-specific keyword lists

View Source
var PHPInputPatterns = []InputPattern{

	{Regex: regexp.MustCompile(`\$_GET\[`), Name: "$_GET"},
	{Regex: regexp.MustCompile(`\$_POST\[`), Name: "$_POST"},
	{Regex: regexp.MustCompile(`\$_REQUEST\[`), Name: "$_REQUEST"},
	{Regex: regexp.MustCompile(`\$_COOKIE\[`), Name: "$_COOKIE"},
	{Regex: regexp.MustCompile(`\$_FILES\[`), Name: "$_FILES"},
	{Regex: regexp.MustCompile(`\$_SERVER\[`), Name: "$_SERVER"},

	{Regex: regexp.MustCompile(`\$request->input\(`), Name: "$request->input()"},
	{Regex: regexp.MustCompile(`\$request->get\(`), Name: "$request->get()"},
	{Regex: regexp.MustCompile(`\$request->post\(`), Name: "$request->post()"},
	{Regex: regexp.MustCompile(`\$request->query\[`), Name: "$request->query"},
	{Regex: regexp.MustCompile(`\$request->all\(`), Name: "$request->all()"},

	{Regex: regexp.MustCompile(`->getQueryParams\(`), Name: "->getQueryParams()"},
	{Regex: regexp.MustCompile(`->getParsedBody\(`), Name: "->getParsedBody()"},
	{Regex: regexp.MustCompile(`->getCookieParams\(`), Name: "->getCookieParams()"},
}

PHPInputPatterns contains pre-compiled patterns for PHP input access Used for detecting input sources in expressions without carrier map

NOTE: These are GENERIC patterns. Framework-specific patterns (MyBB, WordPress, etc.) are defined in pkg/sources/php/{framework}.go and registered separately.

View Source
var UniversalKeywords = map[string]bool{

	"if": true, "else": true, "elseif": true, "elif": true,
	"switch": true, "case": true, "default": true,
	"for": true, "while": true, "do": true, "foreach": true,
	"break": true, "continue": true, "return": true,
	"try": true, "catch": true, "finally": true, "throw": true,

	"true": true, "false": true, "null": true, "nil": true, "undefined": true,
	"True": true, "False": true, "None": true,

	"and": true, "or": true, "not": true, "is": true, "in": true,

	"function": true, "func": true, "def": true, "fn": true,
	"var": true, "let": true, "const": true, "mut": true,
	"class": true, "interface": true, "struct": true, "enum": true,
	"type": true, "impl": true, "trait": true,

	"public": true, "private": true, "protected": true,
	"static": true, "final": true, "abstract": true,
	"async": true, "await": true, "yield": true,

	"new": true, "this": true, "self": true, "super": true,
	"instanceof": true, "typeof": true,

	"isset": true, "empty": true, "unset": true,
	"echo": true, "print": true,

	"import": true, "export": true, "require": true, "include": true,
	"use": true, "namespace": true, "package": true, "module": true,
}

UniversalKeywords contains keywords to filter from variable extraction. These are common across most languages and should not be treated as variable names.

Functions

func GetAllFrameworkDetectors

func GetAllFrameworkDetectors() map[string]*FrameworkDetector

GetAllFrameworkDetectors returns all registered detectors

func GetOrCompileRegex

func GetOrCompileRegex(pattern string) (*regexp.Regexp, error)

GetOrCompileRegex returns a cached or newly compiled regex

func IsKeyword

func IsKeyword(word string) bool

IsKeyword checks if a word is a universal keyword

func IsKeywordForLanguage

func IsKeywordForLanguage(word, lang string) bool

IsKeywordForLanguage checks if a word is a keyword for a specific language

func IsValidSourceType

func IsValidSourceType(s string) bool

IsValidSourceType checks if a string is a valid SourceType.

func RegisterFrameworkDetector

func RegisterFrameworkDetector(detector *FrameworkDetector)

RegisterFrameworkDetector registers a framework detector

Types

type BaseMatcher

type BaseMatcher struct {
	// contains filtered or unexported fields
}

BaseMatcher provides common functionality for source matching

func NewBaseMatcher

func NewBaseMatcher(language string, sources []Definition) *BaseMatcher

NewBaseMatcher creates a new base matcher and pre-compiles all regex patterns. It panics if any Pattern, ExcludePattern, or KeyExtractor field contains an invalid regular expression, so invalid patterns are caught at startup rather than silently swallowed on every AST node visit.

func (*BaseMatcher) FindSources

func (m *BaseMatcher) FindSources(root *sitter.Node, src []byte) []Match

FindSources finds all input sources in the AST

func (*BaseMatcher) Language

func (m *BaseMatcher) Language() string

Language returns the language this matcher handles

type Definition

type Definition struct {
	Name               string       // e.g., "$_GET", "req.body"
	Pattern            string       // Regex pattern to match
	Language           string       // Target language
	Labels             []InputLabel // Categories
	Description        string       // Human-readable description
	NodeTypes          []string     // Tree-sitter node types to match
	KeyExtractor       string       // Regex to extract key (e.g., from $_GET['key'])
	ExcludeParentTypes []string     // Skip match if node's parent is one of these AST types
	ExcludePattern     string       // Regex pattern - skip match if node text matches this
	// contains filtered or unexported fields
}

Definition represents a user input source definition

type FrameworkDetector

type FrameworkDetector struct {
	Framework  string   `json:"framework"`
	Indicators []string `json:"indicators"` // File paths that indicate this framework
}

FrameworkDetector defines file path indicators for framework detection

func GetFrameworkDetector

func GetFrameworkDetector(framework string) *FrameworkDetector

GetFrameworkDetector returns a framework detector by name

type FrameworkPattern

type FrameworkPattern struct {
	ID          string `json:"id"`
	Framework   string `json:"framework"`
	Language    string `json:"language"`
	Name        string `json:"name"`
	Description string `json:"description"`

	// Pattern matching (regex strings)
	ClassPattern    string `json:"class_pattern,omitempty"`    // Regex for class names
	MethodPattern   string `json:"method_pattern,omitempty"`   // Regex for method names
	PropertyPattern string `json:"property_pattern,omitempty"` // Regex for property names
	AccessPattern   string `json:"access_pattern,omitempty"`   // How data is accessed: "array", "method", "property", "superglobal"

	// Source mapping
	SourceType SourceType `json:"source_type"`
	SourceKey  string     `json:"source_key,omitempty"` // How to extract the key

	// Flow information (for carrier tracking)
	CarrierClass    string   `json:"carrier_class,omitempty"`
	CarrierProperty string   `json:"carrier_property,omitempty"`
	PopulatedBy     string   `json:"populated_by,omitempty"`   // Method that populates the carrier
	PopulatedFrom   []string `json:"populated_from,omitempty"` // Original sources (e.g., ["$_GET", "$_POST"])

	// Tags for categorization
	Tags []string `json:"tags,omitempty"`
}

FrameworkPattern defines a framework-specific input source pattern This is the centralized definition - all language analyzers should use this

type FrameworkPatternRegistry

type FrameworkPatternRegistry struct {
	// contains filtered or unexported fields
}

FrameworkPatternRegistry manages framework patterns for a language

func NewFrameworkPatternRegistry

func NewFrameworkPatternRegistry(language string) *FrameworkPatternRegistry

NewFrameworkPatternRegistry creates a new registry for a language

func (*FrameworkPatternRegistry) Count

func (r *FrameworkPatternRegistry) Count() int

Count returns the total number of registered patterns

func (*FrameworkPatternRegistry) GetAll

GetAll returns all registered patterns

func (*FrameworkPatternRegistry) GetByFramework

func (r *FrameworkPatternRegistry) GetByFramework(framework string) []*FrameworkPattern

GetByFramework returns all patterns for a specific framework

func (*FrameworkPatternRegistry) GetByID

GetByID returns a pattern by its ID

func (*FrameworkPatternRegistry) GetFrameworks

func (r *FrameworkPatternRegistry) GetFrameworks() []string

GetFrameworks returns a list of all registered frameworks

func (*FrameworkPatternRegistry) Register

func (r *FrameworkPatternRegistry) Register(pattern *FrameworkPattern)

Register adds a pattern to the registry

func (*FrameworkPatternRegistry) RegisterAll

func (r *FrameworkPatternRegistry) RegisterAll(patterns []*FrameworkPattern)

RegisterAll adds multiple patterns to the registry

type InputLabel

type InputLabel = core.InputLabel

InputLabel represents the category of user input. This is a type alias — the canonical definition lives in pkg/sources/core.

type InputPattern

type InputPattern struct {
	Regex *regexp.Regexp
	Name  string // Human-readable name for matched input source
}

InputPattern represents a compiled regex pattern for input detection

type Match

type Match struct {
	SourceType string // e.g., "$_GET", "req.body"
	Key        string // e.g., "username" in $_GET['username']
	Variable   string // Variable name if assigned
	Line       int
	Column     int
	EndLine    int
	EndColumn  int
	Snippet    string
	Labels     []InputLabel
}

Match represents a matched source in code

type Matcher

type Matcher interface {
	Language() string
	FindSources(root *sitter.Node, src []byte) []Match
}

Matcher interface for language-specific source detection

type SourceType

type SourceType = core.SourceType

SourceType represents the semantic type of an input source. This is a type alias — the canonical definition lives in pkg/sources/core.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL