Documentation
¶
Overview ¶
-- builder.go --
-- canonicalizer.go --
-- semantic_firewall/fingerprinter.go --
Index ¶
- Constants
- Variables
- func AnalyzeSCEV(info *LoopInfo)
- func BuildSSAFromPackages(initialPkgs []*packages.Package) (*ssa.Program, *ssa.Package, error)
- func CalculateEntropy(data []byte) float64
- func CalculateEntropyNormalized(data []byte) float64
- func CheckIRPattern(t *testing.T, ir string, pattern string)
- func ComputeTopologySimilarityExported(topo *FunctionTopology, sig Signature) float64
- func EntropyDistance(e1, e2 float64) float64
- func EntropyMatch(e1, e2, tolerance float64) bool
- func FormatEntropyKeyExported(entropy float64, id string) string
- func GenerateFuzzyHash(t *FunctionTopology) string
- func GenerateTopologyHashExported(topo *FunctionTopology) string
- func GetFunctionNames(results []FingerprintResult) []string
- func GetHardenedEnv() []string
- func MapSimilarity(a, b map[string]int) float64
- func MatchCallsExported(topo *FunctionTopology, required []string) (score float64, matched, missing []string)
- func MatchFunctionsByTopology(oldResults, newResults []FingerprintResult, threshold float64) (matched []TopologyMatch, addedFuncs []FingerprintResult, ...)
- func ReleaseCanonicalizer(c *Canonicalizer)
- func SetTopologyLimits(maxLen, maxTotal int)
- func SetupTestEnv(t *testing.T, dirPrefix string) (string, func())
- func ShortFuncName(fullName string) string
- func TopologyFingerprint(t *FunctionTopology) string
- func TopologySimilarity(a, b *FunctionTopology) float64
- type Canonicalizer
- type ControlFlowHints
- type DatabaseMetadata
- type EntropyClass
- type EntropyProfile
- type FingerprintResult
- func CompileAndGetFunction(t *testing.T, src, funcName string) *FingerprintResult
- func FindResult(results []FingerprintResult, name string) *FingerprintResult
- func FingerprintPackages(initialPkgs []*packages.Package, policy LiteralPolicy, strictMode bool) ([]FingerprintResult, error)
- func FingerprintSource(filename string, src string, policy LiteralPolicy) ([]FingerprintResult, error)
- func FingerprintSourceAdvanced(filename string, src string, policy LiteralPolicy, strictMode bool) ([]FingerprintResult, error)
- func GenerateFingerprint(fn *ssa.Function, policy LiteralPolicy, strictMode bool) FingerprintResult
- type FunctionTopology
- type IVType
- type IdentifyingFeatures
- type InductionVariable
- type LiteralPolicy
- type Loop
- type LoopInfo
- type MatchDetails
- type PebbleScanner
- func (s *PebbleScanner) AddSignature(sig Signature) error
- func (s *PebbleScanner) AddSignatures(sigs []Signature) error
- func (s *PebbleScanner) Checkpoint() error
- func (s *PebbleScanner) Close() error
- func (s *PebbleScanner) Compact() error
- func (s *PebbleScanner) CountSignatures() (int, error)
- func (s *PebbleScanner) DeleteMetadata(key string) error
- func (s *PebbleScanner) DeleteSignature(id string) error
- func (s *PebbleScanner) ExportToJSON(jsonPath string) error
- func (s *PebbleScanner) GetAllMetadata() (*DatabaseMetadata, error)
- func (s *PebbleScanner) GetMetadata(key string) (string, error)
- func (s *PebbleScanner) GetSignature(id string) (*Signature, error)
- func (s *PebbleScanner) GetSignatureByTopology(topoHash string) (*Signature, error)
- func (s *PebbleScanner) GetSnapshot() *pebble.Snapshot
- func (s *PebbleScanner) InitializeMetadata(version, description string) error
- func (s *PebbleScanner) ListSignatureIDs() ([]string, error)
- func (s *PebbleScanner) MarkFalsePositive(id string, notes string) error
- func (s *PebbleScanner) MigrateFromJSON(jsonPath string) (int, error)
- func (s *PebbleScanner) RebuildIndexes() error
- func (s *PebbleScanner) ScanBatch(topologies map[string]*FunctionTopology) map[string][]ScanResult
- func (s *PebbleScanner) ScanByEntropyRange(minEntropy, maxEntropy float64) ([]Signature, error)
- func (s *PebbleScanner) ScanTopology(topo *FunctionTopology, funcName string) ([]ScanResult, error)
- func (s *PebbleScanner) ScanTopologyExact(topo *FunctionTopology, funcName string) (*ScanResult, error)
- func (s *PebbleScanner) ScanTopologyWithSnapshot(snap *pebble.Snapshot, topo *FunctionTopology, funcName string) ([]ScanResult, error)
- func (s *PebbleScanner) SetAllMetadata(meta *DatabaseMetadata) error
- func (s *PebbleScanner) SetEntropyTolerance(tolerance float64)
- func (s *PebbleScanner) SetMetadata(key, value string) error
- func (s *PebbleScanner) SetThreshold(threshold float64)
- func (s *PebbleScanner) Stats() (*PebbleScannerStats, error)
- func (s *PebbleScanner) TouchLastUpdated() error
- type PebbleScannerOptions
- type PebbleScannerStats
- type Renamer
- type SCEV
- type SCEVAddRec
- func (s *SCEVAddRec) EvaluateAt(k *big.Int) *big.Int
- func (s *SCEVAddRec) IsLoopInvariant(loop *Loop) bool
- func (s *SCEVAddRec) Name() string
- func (s *SCEVAddRec) Parent() *ssa.Function
- func (s *SCEVAddRec) Pos() token.Pos
- func (s *SCEVAddRec) Referrers() *[]ssa.Instruction
- func (s *SCEVAddRec) String() string
- func (s *SCEVAddRec) StringWithRenamer(r Renamer) string
- func (s *SCEVAddRec) Type() types.Type
- type SCEVConstant
- func (s *SCEVConstant) EvaluateAt(k *big.Int) *big.Int
- func (s *SCEVConstant) IsLoopInvariant(loop *Loop) bool
- func (s *SCEVConstant) Name() string
- func (s *SCEVConstant) Parent() *ssa.Function
- func (s *SCEVConstant) Pos() token.Pos
- func (s *SCEVConstant) Referrers() *[]ssa.Instruction
- func (s *SCEVConstant) String() string
- func (s *SCEVConstant) StringWithRenamer(r Renamer) string
- func (s *SCEVConstant) Type() types.Type
- type SCEVGenericExpr
- func (s *SCEVGenericExpr) EvaluateAt(k *big.Int) *big.Int
- func (s *SCEVGenericExpr) IsLoopInvariant(loop *Loop) bool
- func (s *SCEVGenericExpr) Name() string
- func (s *SCEVGenericExpr) Parent() *ssa.Function
- func (s *SCEVGenericExpr) Pos() token.Pos
- func (s *SCEVGenericExpr) Referrers() *[]ssa.Instruction
- func (s *SCEVGenericExpr) String() string
- func (s *SCEVGenericExpr) StringWithRenamer(r Renamer) string
- func (s *SCEVGenericExpr) Type() types.Type
- type SCEVUnknown
- func (s *SCEVUnknown) EvaluateAt(k *big.Int) *big.Int
- func (s *SCEVUnknown) IsLoopInvariant(loop *Loop) bool
- func (s *SCEVUnknown) Name() string
- func (s *SCEVUnknown) Parent() *ssa.Function
- func (s *SCEVUnknown) Pos() token.Pos
- func (s *SCEVUnknown) Referrers() *[]ssa.Instruction
- func (s *SCEVUnknown) String() string
- func (s *SCEVUnknown) StringWithRenamer(r Renamer) string
- func (s *SCEVUnknown) Type() types.Type
- type ScanResult
- type Scanner
- func (s *Scanner) AddSignature(sig Signature)
- func (s *Scanner) GetDatabase() *SignatureDatabase
- func (s *Scanner) LoadDatabase(path string) error
- func (s *Scanner) SaveDatabase(path string) error
- func (s *Scanner) ScanTopology(topo *FunctionTopology, funcName string) []ScanResult
- func (s *Scanner) SetThreshold(threshold float64)
- type Signature
- type SignatureDatabase
- type SignatureMetadata
- type TopologyMatch
- type Zipper
- type ZipperArtifacts
Constants ¶
const MaxCandidates = 100
Limits comparison candidates per fingerprint bucket. Prevents algorithmic DoS.
const MaxFunctionBlocks = 5000
const MaxRenamerDepth = 20
Limits recursion depth to 20. 20 levels allows 2^20 (~1M) which is safe and sufficient for code analysis.
const MaxSCEVDepth = 100
Limits recursion depth in SCEV analysis to prevent stack overflow.
Variables ¶
var ( // Hardening: Prevent Memory DoS from massive string literals // These limits can be adjusted via SetTopologyLimits. MaxStringLiteralLen = 4096 // 4KB limit per string MaxTotalStringBytes = 1024 * 64 // 64KB limit per function )
var DefaultLiteralPolicy = LiteralPolicy{ AbstractControlFlowComparisons: true, KeepSmallIntegerIndices: true, KeepReturnStatusValues: true, KeepStringLiterals: false, SmallIntMin: -16, SmallIntMax: 16, AbstractOtherTypes: true, }
Standard policy for fingerprinting. Preserves small integers used for indexing and status codes while masking magic numbers and large constants.
var KeepAllLiteralsPolicy = LiteralPolicy{ AbstractControlFlowComparisons: false, KeepSmallIntegerIndices: true, KeepReturnStatusValues: true, KeepStringLiterals: true, SmallIntMin: math.MinInt64, SmallIntMax: math.MaxInt64, AbstractOtherTypes: false, }
Designed for testing or exact matching by disabling most abstractions and expanding the "small" integer range to the full int64 spectrum.
Functions ¶
func AnalyzeSCEV ¶
func AnalyzeSCEV(info *LoopInfo)
func BuildSSAFromPackages ¶
Constructs Static Single Assignment form from loaded Go packages. Returns the complete program and the target package for analysis.
func CalculateEntropy ¶
Returns the Shannon entropy of a byte slice. Result ranges from 0.0 (completely uniform/predictable) to 8.0 (maximum randomness). High entropy (>7.0) often indicates packed/encrypted code. Normal code typically has entropy between 4.5 and 6.5.
func CalculateEntropyNormalized ¶
Returns entropy normalized to 0.0-1.0 range. Useful for direct comparison and threshold checks.
func CheckIRPattern ¶
CheckIRPattern checks IR against a pattern using regex, abstracting register names. Exported for use in external test packages.
func ComputeTopologySimilarityExported ¶
func ComputeTopologySimilarityExported(topo *FunctionTopology, sig Signature) float64
ComputeTopologySimilarityExported exports the computeTopologySimilarity function for testing.
func EntropyDistance ¶
Calculates the absolute difference between two entropy values. Used for fuzzy matching: two functions with similar entropy are more likely related.
func EntropyMatch ¶
Returns true if two entropy values are within the given tolerance. Default tolerance of 0.5 is recommended for malware family matching.
func FormatEntropyKeyExported ¶
FormatEntropyKeyExported exports the formatEntropyKey function for testing.
func GenerateFuzzyHash ¶
func GenerateFuzzyHash(t *FunctionTopology) string
GenerateFuzzyHash creates a short representation of the function structure.
Optimization Rationale: Explicitly fold ParamCount (P) and ReturnCount (R) into the hash. While Blocks/Loops/Branches define internal logic complexity, the signature (P/R) provides "external" structural stability. This dramatically increases the entropy of the hash, splitting massive buckets of small/generic functions (e.g., getters, setters, wrappers) into distinct groups, ensuring the O(N*K) matching algorithm keeps K small and approaches O(N).
func GenerateTopologyHashExported ¶
func GenerateTopologyHashExported(topo *FunctionTopology) string
GenerateTopologyHashExported exports the generateTopologyHash function for testing.
func GetFunctionNames ¶
func GetFunctionNames(results []FingerprintResult) []string
GetFunctionNames extracts function names from results for easier verification. Exported for use in external test packages.
func GetHardenedEnv ¶ added in v2.2.0
func GetHardenedEnv() []string
Returns a slice of environment variables configured for secure analysis.
func MapSimilarity ¶ added in v2.2.0
MapSimilarity calculates the similarity between two frequency maps.
func MatchCallsExported ¶
func MatchCallsExported(topo *FunctionTopology, required []string) (score float64, matched, missing []string)
MatchCallsExported exports the matchCalls function for testing.
func MatchFunctionsByTopology ¶
func MatchFunctionsByTopology(oldResults, newResults []FingerprintResult, threshold float64) ( matched []TopologyMatch, addedFuncs []FingerprintResult, removedFuncs []FingerprintResult, )
MatchFunctionsByTopology correlates functions across versions using structural analysis.
func ReleaseCanonicalizer ¶
func ReleaseCanonicalizer(c *Canonicalizer)
func SetTopologyLimits ¶ added in v2.2.1
func SetTopologyLimits(maxLen, maxTotal int)
SetTopologyLimits adjusts the memory safeguards for string processing.
func SetupTestEnv ¶
SetupTestEnv creates an isolated test environment for packages loader. Exported for use in external test packages.
func ShortFuncName ¶
ShortFuncName returns the short function name without package prefix. Exported for use in external test packages.
func TopologyFingerprint ¶
func TopologyFingerprint(t *FunctionTopology) string
func TopologySimilarity ¶
func TopologySimilarity(a, b *FunctionTopology) float64
TopologySimilarity calculates the similarity between two function topologies.
Types ¶
type Canonicalizer ¶
type Canonicalizer struct {
Policy LiteralPolicy
StrictMode bool
// contains filtered or unexported fields
}
Transforms an SSA function into a deterministic string representation.
func AcquireCanonicalizer ¶
func AcquireCanonicalizer(policy LiteralPolicy) *Canonicalizer
func NewCanonicalizer ¶
func NewCanonicalizer(policy LiteralPolicy) *Canonicalizer
func (*Canonicalizer) ApplyVirtualControlFlowFromState ¶
func (c *Canonicalizer) ApplyVirtualControlFlowFromState(swappedBlocks map[*ssa.BasicBlock]bool, virtualBinOps map[*ssa.BinOp]token.Token)
func (*Canonicalizer) CanonicalizeFunction ¶
func (c *Canonicalizer) CanonicalizeFunction(fn *ssa.Function) string
type ControlFlowHints ¶
type ControlFlowHints struct {
HasInfiniteLoop bool `json:"has_infinite_loop,omitempty"`
HasReconnectLogic bool `json:"has_reconnect_logic,omitempty"`
}
Captures control flow patterns.
type DatabaseMetadata ¶ added in v2.2.0
type DatabaseMetadata struct {
Version string `json:"version"`
Description string `json:"description"`
CreatedAt time.Time `json:"created_at"`
LastUpdatedAt time.Time `json:"last_updated_at"`
SignatureCount int `json:"signature_count"`
SourceHash string `json:"source_hash"`
Custom map[string]string `json:"custom,omitempty"`
}
DatabaseMetadata contains information about the signature database.
type EntropyClass ¶
type EntropyClass int
Categorizes entropy levels for quick analysis.
const ( EntropyLow EntropyClass = iota // < 4.0: Simple/sparse code EntropyNormal // 4.0-6.5: Typical compiled code EntropyHigh // 6.5-7.5: Potentially obfuscated EntropyPacked // > 7.5: Likely packed/encrypted )
func ClassifyEntropy ¶
func ClassifyEntropy(entropy float64) EntropyClass
Determines the entropy class from a raw entropy value.
func (EntropyClass) String ¶
func (c EntropyClass) String() string
type EntropyProfile ¶
type EntropyProfile struct {
// Overall entropy of the function body
Overall float64
// Entropy of string literals within the function
StringLiteralEntropy float64
// Entropy classification
Classification EntropyClass
}
Captures entropy characteristics for malware analysis.
func CalculateEntropyProfile ¶
func CalculateEntropyProfile(bodyBytes []byte, stringLiterals []string) EntropyProfile
Builds a complete entropy profile for analysis.
type FingerprintResult ¶
type FingerprintResult struct {
FunctionName string
Fingerprint string
CanonicalIR string
Pos token.Pos
Line int
Filename string
// contains filtered or unexported fields
}
func CompileAndGetFunction ¶
func CompileAndGetFunction(t *testing.T, src, funcName string) *FingerprintResult
CompileAndGetFunction is a helper to compile source and get a named SSA function. Exported for use in external test packages.
func FindResult ¶
func FindResult(results []FingerprintResult, name string) *FingerprintResult
FindResult searches for a FingerprintResult by function name. It supports both exact matches and suffix matches (e.g., "functionName" matches "pkg.functionName"). Exported for use in external test packages.
func FingerprintPackages ¶
func FingerprintPackages(initialPkgs []*packages.Package, policy LiteralPolicy, strictMode bool) ([]FingerprintResult, error)
func FingerprintSource ¶
func FingerprintSource(filename string, src string, policy LiteralPolicy) ([]FingerprintResult, error)
func FingerprintSourceAdvanced ¶
func FingerprintSourceAdvanced(filename string, src string, policy LiteralPolicy, strictMode bool) ([]FingerprintResult, error)
func GenerateFingerprint ¶
func GenerateFingerprint(fn *ssa.Function, policy LiteralPolicy, strictMode bool) FingerprintResult
func (FingerprintResult) GetSSAFunction ¶
func (r FingerprintResult) GetSSAFunction() *ssa.Function
type FunctionTopology ¶
type FunctionTopology struct {
FuzzyHash string
// Basic metrics
ParamCount int
ReturnCount int
BlockCount int
InstrCount int
LoopCount int
BranchCount int // if statements
PhiCount int
// Complexity metrics
CyclomaticComplexity int
// Call profile: map of "package.func" or "method" -> count
CallSignatures map[string]int
// Granular instruction tracking
InstrCounts map[string]int
// Type signature (normalized)
ParamTypes []string
ReturnTypes []string
// Control flow features
HasDefer bool
HasRecover bool
HasPanic bool
HasGo bool
HasSelect bool
HasRange bool
// Operator profile
BinOpCounts map[string]int
UnOpCounts map[string]int
// String literal hashes (for behavioral matching)
StringLiterals []string
// Entropy analysis for obfuscation detection
EntropyScore float64
EntropyProfile EntropyProfile
// contains filtered or unexported fields
}
FunctionTopology captures the structural "shape" of a function independent of names.
func ExtractTopology ¶
func ExtractTopology(fn *ssa.Function) *FunctionTopology
ExtractTopology analyzes an SSA function and extracts its structural features.
type IdentifyingFeatures ¶
type IdentifyingFeatures struct {
RequiredCalls []string `json:"required_calls,omitempty"`
OptionalCalls []string `json:"optional_calls,omitempty"`
StringPatterns []string `json:"string_patterns,omitempty"`
ControlFlow *ControlFlowHints `json:"control_flow,omitempty"`
}
Captures behavioral markers for detection.
type InductionVariable ¶
type InductionVariable struct {
Phi *ssa.Phi
Type IVType
Start SCEV // Value at iteration 0
Step SCEV // Update stride
}
Describes a detected IV. Reference: Section 3.2 Classification Taxonomy.
type LiteralPolicy ¶
type LiteralPolicy struct {
AbstractControlFlowComparisons bool
KeepSmallIntegerIndices bool
KeepReturnStatusValues bool
KeepStringLiterals bool
SmallIntMin int64
SmallIntMax int64
AbstractOtherTypes bool
}
Defines the configurable strategy for determining which literal values should be abstracted into placeholders during canonicalization. Allows fine grained control over integer abstraction in different contexts.
func (*LiteralPolicy) ShouldAbstract ¶
func (p *LiteralPolicy) ShouldAbstract(c *ssa.Const, usageContext ssa.Instruction) bool
Decides whether a given constant should be replaced by a generic placeholder. Analyzes the constant's type, value, and immediate usage context in the SSA graph.
type Loop ¶
type Loop struct {
Header *ssa.BasicBlock
Latch *ssa.BasicBlock // Primary source of the backedge
// Blocks contains all basic blocks within the loop body.
Blocks map[*ssa.BasicBlock]bool
// Exits contains blocks inside the loop that have successors outside.
Exits []*ssa.BasicBlock
// Hierarchy
Parent *Loop
Children []*Loop
// Semantic Analysis (populated in scev.go)
Inductions map[*ssa.Phi]*InductionVariable
TripCount SCEV // Symbolic expression
// Memoization cache for SCEV analysis to prevent exponential complexity.
SCEVCache map[ssa.Value]SCEV
}
Represents a natural loop in the SSA graph. Reference: Section 2.3 Natural Loops.
type LoopInfo ¶
type LoopInfo struct {
Function *ssa.Function
Loops []*Loop // Top-level loops (roots of the hierarchy)
// Map from Header block to Loop object for O(1) lookup
LoopMap map[*ssa.BasicBlock]*Loop
}
Summarizes loop analysis for a single function.
func DetectLoops ¶
Reconstructs the loop hierarchy using dominance relations. Reference: Section 2.3.1 Algorithm: Detecting Natural Loops.
type MatchDetails ¶
type MatchDetails struct {
TopologyMatch bool `json:"topology_match"`
EntropyMatch bool `json:"entropy_match"`
CallsMatched []string `json:"calls_matched"`
CallsMissing []string `json:"calls_missing"`
StringsMatched []string `json:"strings_matched"`
TopologySimilarity float64 `json:"topology_similarity"`
EntropyDistance float64 `json:"entropy_distance"`
}
Provides granular information about the match.
type PebbleScanner ¶ added in v2.2.0
type PebbleScanner struct {
// contains filtered or unexported fields
}
PebbleScanner performs semantic malware detection using CockroachDB's Pebble for persistent storage. Pebble's LSM tree architecture provides:
- No CGO dependency (pure Go)
- No page level locking (high concurrency)
- Optimized for heavy read / high throughput workloads (CI/CD pipeline scale)
- Built in compression (LZ4/Snappy/ZSTD)
Supports O(1) exact topology matching and O(M) fuzzy entropy range scans.
func NewPebbleScanner ¶ added in v2.2.0
func NewPebbleScanner(dbPath string, opts PebbleScannerOptions) (*PebbleScanner, error)
NewPebbleScanner opens or creates a Pebble backed signature database. The database directory will be created if it doesn't exist.
func (*PebbleScanner) AddSignature ¶ added in v2.2.0
func (s *PebbleScanner) AddSignature(sig Signature) error
Atomically saves a signature and updates all indexes. Safe for concurrent use. Uses Pebble's WriteBatch for atomic writes. OPTIMIZATION: Writes Gob (faster storage) and Packed Index (faster lookups).
func (*PebbleScanner) AddSignatures ¶ added in v2.2.0
func (s *PebbleScanner) AddSignatures(sigs []Signature) error
Atomically adds multiple signatures in a single batch.
func (*PebbleScanner) Checkpoint ¶ added in v2.2.0
func (s *PebbleScanner) Checkpoint() error
Creates a durable snapshot of the database.
func (*PebbleScanner) Close ¶ added in v2.2.0
func (s *PebbleScanner) Close() error
Close flushes all pending writes and closes the database. Always call this when done to prevent data loss.
func (*PebbleScanner) Compact ¶ added in v2.2.0
func (s *PebbleScanner) Compact() error
Triggers a manual compaction to reclaim space.
func (*PebbleScanner) CountSignatures ¶ added in v2.2.0
func (s *PebbleScanner) CountSignatures() (int, error)
Returns the number of signatures in the database.
func (*PebbleScanner) DeleteMetadata ¶ added in v2.2.0
func (s *PebbleScanner) DeleteMetadata(key string) error
func (*PebbleScanner) DeleteSignature ¶ added in v2.2.0
func (s *PebbleScanner) DeleteSignature(id string) error
Removes a signature and its index entries atomically.
func (*PebbleScanner) ExportToJSON ¶ added in v2.2.0
func (s *PebbleScanner) ExportToJSON(jsonPath string) error
ExportToJSON exports all signatures to a JSON file. NOTE: Reads Gob (internal), Writes JSON (external).
func (*PebbleScanner) GetAllMetadata ¶ added in v2.2.0
func (s *PebbleScanner) GetAllMetadata() (*DatabaseMetadata, error)
func (*PebbleScanner) GetMetadata ¶ added in v2.2.0
func (s *PebbleScanner) GetMetadata(key string) (string, error)
func (*PebbleScanner) GetSignature ¶ added in v2.2.0
func (s *PebbleScanner) GetSignature(id string) (*Signature, error)
Retrieves a signature by ID.
func (*PebbleScanner) GetSignatureByTopology ¶ added in v2.2.0
func (s *PebbleScanner) GetSignatureByTopology(topoHash string) (*Signature, error)
Retrieves the first signature matching a topology hash.
func (*PebbleScanner) GetSnapshot ¶ added in v2.2.0
func (s *PebbleScanner) GetSnapshot() *pebble.Snapshot
Returns a read only snapshot of the database at a point in time.
func (*PebbleScanner) InitializeMetadata ¶ added in v2.2.0
func (s *PebbleScanner) InitializeMetadata(version, description string) error
func (*PebbleScanner) ListSignatureIDs ¶ added in v2.2.0
func (s *PebbleScanner) ListSignatureIDs() ([]string, error)
ListSignatureIDs returns all signature IDs.
func (*PebbleScanner) MarkFalsePositive ¶ added in v2.2.0
func (s *PebbleScanner) MarkFalsePositive(id string, notes string) error
Updates a signature to record that it caused a false positive.
func (*PebbleScanner) MigrateFromJSON ¶ added in v2.2.0
func (s *PebbleScanner) MigrateFromJSON(jsonPath string) (int, error)
MigrateFromJSON imports signatures directly from a JSON file. FIX: Uses streaming decoder to prevent OOM on large datasets. NOTE: Reads JSON (external), Writes Gob (internal).
func (*PebbleScanner) RebuildIndexes ¶ added in v2.2.0
func (s *PebbleScanner) RebuildIndexes() error
Recreates all indexes from master signature records. IMPORTANT: Reads legacy or Gob records and writes Packed index values.
func (*PebbleScanner) ScanBatch ¶ added in v2.2.0
func (s *PebbleScanner) ScanBatch(topologies map[string]*FunctionTopology) map[string][]ScanResult
Scans multiple topologies efficiently using parallel lookups.
func (*PebbleScanner) ScanByEntropyRange ¶ added in v2.2.0
func (s *PebbleScanner) ScanByEntropyRange(minEntropy, maxEntropy float64) ([]Signature, error)
Finds signatures within an entropy score range.
func (*PebbleScanner) ScanTopology ¶ added in v2.2.0
func (s *PebbleScanner) ScanTopology(topo *FunctionTopology, funcName string) ([]ScanResult, error)
Checks a function topology against the signature database using two phases:
- Phase A (O(K)): Exact topology hash lookup (iterating collisions)
- Phase B (O(K)): Fuzzy bucket index lookup (LSH lite)
OPTIMIZED: 1. Reads packed index values to perform lightweight entropy filtering. 2. Skips expensive database reads and decoding for candidates outside entropy tolerance.
func (*PebbleScanner) ScanTopologyExact ¶ added in v2.2.0
func (s *PebbleScanner) ScanTopologyExact(topo *FunctionTopology, funcName string) (*ScanResult, error)
Performs only exact topology hash matching (fastest).
func (*PebbleScanner) ScanTopologyWithSnapshot ¶ added in v2.2.0
func (s *PebbleScanner) ScanTopologyWithSnapshot(snap *pebble.Snapshot, topo *FunctionTopology, funcName string) ([]ScanResult, error)
Scans using a specific snapshot (for consistent reads).
func (*PebbleScanner) SetAllMetadata ¶ added in v2.2.0
func (s *PebbleScanner) SetAllMetadata(meta *DatabaseMetadata) error
func (*PebbleScanner) SetEntropyTolerance ¶ added in v2.2.0
func (s *PebbleScanner) SetEntropyTolerance(tolerance float64)
Updates the entropy fuzzy match window.
func (*PebbleScanner) SetMetadata ¶ added in v2.2.0
func (s *PebbleScanner) SetMetadata(key, value string) error
func (*PebbleScanner) SetThreshold ¶ added in v2.2.0
func (s *PebbleScanner) SetThreshold(threshold float64)
Updates the minimum confidence threshold for alerts.
func (*PebbleScanner) Stats ¶ added in v2.2.0
func (s *PebbleScanner) Stats() (*PebbleScannerStats, error)
Returns database statistics.
func (*PebbleScanner) TouchLastUpdated ¶ added in v2.2.0
func (s *PebbleScanner) TouchLastUpdated() error
type PebbleScannerOptions ¶ added in v2.2.0
type PebbleScannerOptions struct {
MatchThreshold float64 // Minimum confidence for alerts (default: 0.75)
EntropyTolerance float64 // Entropy fuzzy match window (default: 0.5)
ReadOnly bool // Open DB in read-only mode for scanning only
CacheSize int64 // Block cache size in bytes (default: 8MB)
}
PebbleScannerOptions configures the PebbleScanner initialization.
func DefaultPebbleScannerOptions ¶ added in v2.2.0
func DefaultPebbleScannerOptions() PebbleScannerOptions
DefaultPebbleScannerOptions returns sensible defaults for production use.
type PebbleScannerStats ¶ added in v2.2.0
type PebbleScannerStats struct {
SignatureCount int
TopoIndexCount int
FuzzyIndexCount int
EntropyIndexCount int
DiskSpaceUsed int64
}
PebbleScannerStats contains database statistics.
type SCEVAddRec ¶
func (*SCEVAddRec) EvaluateAt ¶
func (s *SCEVAddRec) EvaluateAt(k *big.Int) *big.Int
func (*SCEVAddRec) IsLoopInvariant ¶
func (s *SCEVAddRec) IsLoopInvariant(loop *Loop) bool
func (*SCEVAddRec) Name ¶
func (s *SCEVAddRec) Name() string
func (*SCEVAddRec) Parent ¶
func (s *SCEVAddRec) Parent() *ssa.Function
func (*SCEVAddRec) Pos ¶
func (s *SCEVAddRec) Pos() token.Pos
func (*SCEVAddRec) Referrers ¶
func (s *SCEVAddRec) Referrers() *[]ssa.Instruction
func (*SCEVAddRec) String ¶
func (s *SCEVAddRec) String() string
func (*SCEVAddRec) StringWithRenamer ¶
func (s *SCEVAddRec) StringWithRenamer(r Renamer) string
func (*SCEVAddRec) Type ¶
func (s *SCEVAddRec) Type() types.Type
type SCEVConstant ¶
func SCEVFromConst ¶
func SCEVFromConst(c *ssa.Const) *SCEVConstant
func (*SCEVConstant) EvaluateAt ¶
func (s *SCEVConstant) EvaluateAt(k *big.Int) *big.Int
func (*SCEVConstant) IsLoopInvariant ¶
func (s *SCEVConstant) IsLoopInvariant(loop *Loop) bool
func (*SCEVConstant) Name ¶
func (s *SCEVConstant) Name() string
func (*SCEVConstant) Parent ¶
func (s *SCEVConstant) Parent() *ssa.Function
func (*SCEVConstant) Pos ¶
func (s *SCEVConstant) Pos() token.Pos
func (*SCEVConstant) Referrers ¶
func (s *SCEVConstant) Referrers() *[]ssa.Instruction
func (*SCEVConstant) String ¶
func (s *SCEVConstant) String() string
func (*SCEVConstant) StringWithRenamer ¶
func (s *SCEVConstant) StringWithRenamer(r Renamer) string
func (*SCEVConstant) Type ¶
func (s *SCEVConstant) Type() types.Type
type SCEVGenericExpr ¶
func (*SCEVGenericExpr) EvaluateAt ¶
func (s *SCEVGenericExpr) EvaluateAt(k *big.Int) *big.Int
func (*SCEVGenericExpr) IsLoopInvariant ¶
func (s *SCEVGenericExpr) IsLoopInvariant(loop *Loop) bool
func (*SCEVGenericExpr) Name ¶
func (s *SCEVGenericExpr) Name() string
func (*SCEVGenericExpr) Parent ¶
func (s *SCEVGenericExpr) Parent() *ssa.Function
func (*SCEVGenericExpr) Pos ¶
func (s *SCEVGenericExpr) Pos() token.Pos
func (*SCEVGenericExpr) Referrers ¶
func (s *SCEVGenericExpr) Referrers() *[]ssa.Instruction
func (*SCEVGenericExpr) String ¶
func (s *SCEVGenericExpr) String() string
func (*SCEVGenericExpr) StringWithRenamer ¶
func (s *SCEVGenericExpr) StringWithRenamer(r Renamer) string
func (*SCEVGenericExpr) Type ¶
func (s *SCEVGenericExpr) Type() types.Type
type SCEVUnknown ¶
func (*SCEVUnknown) EvaluateAt ¶
func (s *SCEVUnknown) EvaluateAt(k *big.Int) *big.Int
func (*SCEVUnknown) IsLoopInvariant ¶
func (s *SCEVUnknown) IsLoopInvariant(loop *Loop) bool
func (*SCEVUnknown) Name ¶
func (s *SCEVUnknown) Name() string
func (*SCEVUnknown) Parent ¶
func (s *SCEVUnknown) Parent() *ssa.Function
func (*SCEVUnknown) Pos ¶
func (s *SCEVUnknown) Pos() token.Pos
func (*SCEVUnknown) Referrers ¶
func (s *SCEVUnknown) Referrers() *[]ssa.Instruction
func (*SCEVUnknown) String ¶
func (s *SCEVUnknown) String() string
func (*SCEVUnknown) StringWithRenamer ¶
func (s *SCEVUnknown) StringWithRenamer(r Renamer) string
func (*SCEVUnknown) Type ¶
func (s *SCEVUnknown) Type() types.Type
type ScanResult ¶
type ScanResult struct {
SignatureID string `json:"signature_id"`
SignatureName string `json:"signature_name"`
Severity string `json:"severity"`
MatchedFunction string `json:"matched_function"`
Confidence float64 `json:"confidence"` // 0.0 to 1.0
MatchDetails MatchDetails `json:"match_details"`
}
Represents a match between analyzed code and a signature.
type Scanner ¶
type Scanner struct {
// contains filtered or unexported fields
}
Performs semantic malware detection.
func (*Scanner) AddSignature ¶
Adds a new signature to the database.
func (*Scanner) GetDatabase ¶
func (s *Scanner) GetDatabase() *SignatureDatabase
Returns the current signature database.
func (*Scanner) LoadDatabase ¶
Loads signatures from a JSON file.
func (*Scanner) SaveDatabase ¶
Writes the signature database to a JSON file.
func (*Scanner) ScanTopology ¶
func (s *Scanner) ScanTopology(topo *FunctionTopology, funcName string) []ScanResult
Checks a function topology against all signatures. This is the "Hunter Phase" where we scan unknown code for matches.
func (*Scanner) SetThreshold ¶
Sets the minimum confidence threshold for alerts.
type Signature ¶
type Signature struct {
ID string `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
Severity string `json:"severity"`
Category string `json:"category"`
TopologyHash string `json:"topology_hash"`
FuzzyHash string `json:"fuzzy_hash,omitempty"` // REMEDIATION: LSH bucket
EntropyScore float64 `json:"entropy_score"`
EntropyTolerance float64 `json:"entropy_tolerance"`
NodeCount int `json:"node_count"`
LoopDepth int `json:"loop_depth"`
IdentifyingFeatures IdentifyingFeatures `json:"identifying_features"`
Metadata SignatureMetadata `json:"metadata"`
}
Represents a single malware signature entry.
func IndexFunction ¶
func IndexFunction(topo *FunctionTopology, name, description, severity, category string) Signature
Generates a signature entry from a FunctionTopology. This is the "Lab Phase" where we analyze known malware to build the database.
type SignatureDatabase ¶
type SignatureDatabase struct {
Version string `json:"version"`
Description string `json:"description"`
Signatures []Signature `json:"signatures"`
}
Represents the malware signature database.
type SignatureMetadata ¶
type SignatureMetadata struct {
Author string `json:"author"`
Created string `json:"created"`
References []string `json:"references,omitempty"`
}
Contains provenance information.
type TopologyMatch ¶
type TopologyMatch struct {
OldResult FingerprintResult
NewResult FingerprintResult
OldTopology *FunctionTopology
NewTopology *FunctionTopology
Similarity float64
ByName bool
}
type Zipper ¶
type Zipper struct {
// contains filtered or unexported fields
}
Implements the semantic delta analysis algorithm. Walks the use def chains of two functions in parallel, aligning equivalent nodes and isolating divergence.
func NewZipper ¶
func NewZipper(oldFn, newFn *ssa.Function, policy LiteralPolicy) (*Zipper, error)
Creates a new analysis session between two function versions.
func (*Zipper) ComputeDiff ¶
func (z *Zipper) ComputeDiff() (*ZipperArtifacts, error)
Runs through all four phases of the Zipper algorithm: semantic analysis, anchor alignment, forward propagation, and divergence isolation.
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
cmd
|
|
|
sfw
command
-- ./cmd/sfw/cmd_audit.go --
|
-- ./cmd/sfw/cmd_audit.go -- |
|
samples/clean
command
|
|
|
samples/shuffled
command
|
|
|
v1
command
|
|
|
v2
command
|