Documentation
¶
Index ¶
- Variables
- func CaptureURLComponents(reqURL, baseURL *url.URL, wordSet map[string]struct{}, ...)
- func DeobfuscateEmail(raw string) string
- func ExtractAttrs(doc *goquery.Document, addWords func(string), addContext func(string))
- func ExtractBodyText(doc *goquery.Document, addWords func(string), addContext func(string))
- func ExtractComments(doc *goquery.Document, addWords func(string))
- func ExtractEmails(doc *goquery.Document, addEmail func(string))
- func ExtractEmailsFromText(text string) []string
- func ExtractFromCSS(body []byte, wordSet map[string]struct{})
- func ExtractFromJS(body []byte, wordSet map[string]struct{}) []string
- func ExtractFromJSON(body []byte, wordSet map[string]struct{})
- func ExtractFromXML(body []byte, wordSet map[string]struct{})
- func ExtractMediaMetadata(body []byte, wordSet map[string]struct{})
- func ExtractOfficeMetadata(body []byte, mu *sync.Mutex, metaSet map[string]struct{}, verbose bool, ...)
- func ExtractPDFMetadata(body []byte, mu *sync.Mutex, metaSet map[string]struct{}, verbose bool, ...)
- func ExtractSubtitles(body []byte, wordSet map[string]struct{})
- func ExtractTextContent(body []byte, wordSet map[string]struct{}, pageContexts *[]string)
- func ExtractTitle(doc *goquery.Document) string
- func FollowLinks(doc *goquery.Document, visit func(string))
- func FollowResources(doc *goquery.Document, visit func(string))
- func MatchType(contentType, reqURL string, types []string, exts []string) bool
- func ParseByExtension(ext string, body []byte, wordSet map[string]struct{}, pageContexts *[]string)
- type ContentParser
- type Resource
- type SecretFinding
- type SecretScanner
Constants ¶
This section is empty.
Variables ¶
View Source
var ( DocumentExts = regexp.MustCompile(`(?i)\.(docx|xlsx|pptx|dotx|potx|ppsx)$`) PdfExt = regexp.MustCompile(`(?i)\.pdf$`) )
View Source
var (
EmailRe = regexp.MustCompile(`(?i)[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,4}`)
)
View Source
var Parsers = []ContentParser{ {[]string{"javascript", "ecmascript"}, []string{".js", ".mjs"}, func(body []byte, wordSet map[string]struct{}) { ExtractFromJS(body, wordSet) }}, {[]string{"xml", "svg"}, []string{".xml", ".svg", ".rss", ".atom", ".sitemap"}, ExtractFromXML}, {[]string{"json"}, []string{".json", ".webmanifest"}, ExtractFromJSON}, {[]string{"css"}, []string{".css"}, ExtractFromCSS}, {[]string{"text/vtt", "subrip"}, []string{".vtt", ".srt"}, ExtractSubtitles}, {[]string{"audio", "video"}, []string{".mp3", ".mp4", ".ogg", ".flac", ".wav", ".m4a", ".webm"}, ExtractMediaMetadata}, }
View Source
var Resources = []Resource{
{"script[src]", "src"},
{"link[href]", "href"},
{"img[src]", "src"},
{"iframe[src]", "src"},
{"source[src]", "src"},
{"video[src]", "src"},
{"audio[src]", "src"},
{"track[src]", "src"},
}
View Source
var WordAttrs = []string{
"alt", "title", "placeholder", "aria-label", "aria-description",
"data-title", "data-name", "data-label", "data-value",
"content", "value", "label", "summary",
}
Functions ¶
func CaptureURLComponents ¶
func DeobfuscateEmail ¶
func ExtractAttrs ¶
func ExtractBodyText ¶
func ExtractComments ¶
func ExtractEmails ¶
func ExtractEmailsFromText ¶
func ExtractFromCSS ¶
func ExtractFromJS ¶
func ExtractFromJSON ¶
func ExtractFromXML ¶
func ExtractMediaMetadata ¶
func ExtractOfficeMetadata ¶
func ExtractPDFMetadata ¶
func ExtractSubtitles ¶
func ExtractTextContent ¶
func ExtractTitle ¶
func FollowLinks ¶
func FollowResources ¶
Types ¶
type ContentParser ¶
type SecretFinding ¶ added in v0.5.0
type SecretScanner ¶ added in v0.5.0
type SecretScanner struct {
// contains filtered or unexported fields
}
func NewSecretScanner ¶ added in v0.5.0
func NewSecretScanner() *SecretScanner
func (*SecretScanner) Scan ¶ added in v0.5.0
func (s *SecretScanner) Scan(data []byte, source string) []SecretFinding
Scan checks data for secrets using trufflehog detectors. Accepts []byte directly to avoid unnecessary string/byte conversions.
Click to show internal directories.
Click to hide internal directories.