Documentation
¶
Index ¶
- func FragmentFirstLineAndLink(chunk *sources.Chunk) (int64, *int64, string)
- func FragmentLineOffset(chunk *sources.Chunk, result *detectors.Result) (int64, bool)
- func SetDetectorTimeout(timeout time.Duration)
- func SetResultLineNumber(chunk *sources.Chunk, result *detectors.Result, fragStart int64, mdLine *int64) bool
- func SupportsLineNumbers(sourceType sourcespb.SourceType) bool
- func UpdateLink(ctx context.Context, metadata *source_metadatapb.MetaData, link string, ...) error
- type Config
- type Engine
- func (e *Engine) AhoCorasickCoreKeywords() map[string]struct{}
- func (e *Engine) ChunksChan() <-chan *sources.Chunk
- func (e *Engine) DetectorAvgTime() map[string][]time.Duration
- func (e *Engine) Finish(ctx context.Context) error
- func (e *Engine) GetDetectorsMetrics() map[string]time.Duration
- func (e *Engine) GetMetrics() Metrics
- func (e *Engine) HasFoundResults() bool
- func (e *Engine) ResultsChan() chan detectors.ResultWithMetadata
- func (e *Engine) ScanChunk(chunk *sources.Chunk)
- func (e *Engine) ScanCircleCI(ctx context.Context, token string) (sources.JobProgressRef, error)
- func (e *Engine) ScanConfig(ctx context.Context, configuredSources ...sources.ConfiguredSource) ([]sources.JobProgressRef, error)
- func (e *Engine) ScanDocker(ctx context.Context, c sources.DockerConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanElasticsearch(ctx context.Context, c sources.ElasticsearchConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanFileSystem(ctx context.Context, c sources.FilesystemConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanGCS(ctx context.Context, c sources.GCSConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanGit(ctx context.Context, c sources.GitConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanGitHub(ctx context.Context, c sources.GithubConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanGitHubExperimental(ctx context.Context, c sources.GitHubExperimentalConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanGitLab(ctx context.Context, c sources.GitlabConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanHuggingface(ctx context.Context, c HuggingfaceConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanJenkins(ctx context.Context, jenkinsConfig JenkinsConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanJira(ctx context.Context, c sources.JiraConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanPostman(ctx context.Context, c sources.PostmanConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanS3(ctx context.Context, c sources.S3Config) (sources.JobProgressRef, error)
- func (e *Engine) ScanStdinInput(ctx context.Context, c sources.StdinConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanSyslog(ctx context.Context, c sources.SyslogConfig) (sources.JobProgressRef, error)
- func (e *Engine) ScanTravisCI(ctx context.Context, token string) (sources.JobProgressRef, error)
- func (e *Engine) Start(ctx context.Context)
- type HuggingfaceConfig
- type JenkinsConfig
- type Metrics
- type Printer
- type PrinterDispatcher
- type ResultsDispatcher
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func FragmentFirstLineAndLink ¶
FragmentFirstLineAndLink extracts the first line number and the link from the chunk metadata. It returns:
- The first line number of the fragment.
- A pointer to the line number, facilitating direct updates.
- The link associated with the fragment. This link may be updated in the chunk metadata if there's a change in the line number.
func FragmentLineOffset ¶
FragmentLineOffset sets the line number for a provided source chunk with a given detector result.
func SetDetectorTimeout ¶
SetDetectorTimeout sets the maximum timeout for each detector to scan a chunk.
func SetResultLineNumber ¶
func SetResultLineNumber(chunk *sources.Chunk, result *detectors.Result, fragStart int64, mdLine *int64) bool
SetResultLineNumber sets the line number in the provided result.
func SupportsLineNumbers ¶
func SupportsLineNumbers(sourceType sourcespb.SourceType) bool
SupportsLineNumbers determines if a line number can be found for a source type.
func UpdateLink ¶
func UpdateLink(ctx context.Context, metadata *source_metadatapb.MetaData, link string, line int64) error
UpdateLink updates the link of the provided source metadata.
Types ¶
type Config ¶
type Config struct {
// Number of concurrent scanner workers,
// also serves as a multiplier for other worker types (e.g., detector workers, notifier workers)
Concurrency int
ConfiguredSources []sources.ConfiguredSource
Decoders []decoders.Decoder
Detectors []detectors.Detector
DetectorVerificationOverrides map[config.DetectorID]bool
IncludeDetectors string
ExcludeDetectors string
CustomVerifiersOnly bool
VerifierEndpoints map[string]string
// Verify determines whether the scanner will verify candidate secrets.
Verify bool
// Defines which results will be notified by the engine
// (e.g., verified, unverified, unknown)
Results map[string]struct{}
LogFilteredUnverified bool
// FilterEntropy filters out unverified results using Shannon entropy.
FilterEntropy float64
// FilterUnverified sets the filterUnverified flag on the engine. If set to
// true, the engine will only return the first unverified result for a chunk for a detector.
FilterUnverified bool
ShouldScanEntireChunk bool
Dispatcher ResultsDispatcher
// SourceManager is used to manage the sources and units.
// TODO (ahrav): Update this comment, i'm dumb and don't really know what else it does.
SourceManager *sources.SourceManager
// PrintAvgDetectorTime sets the printAvgDetectorTime flag on the engine. If set to
// true, the engine will print the average time taken by each detector.
// This option allows us to measure the time taken for each detector ONLY if
// the engine is configured to print the results.
// Calculating the average time taken by each detector is an expensive operation
// and should be avoided unless specified by the user.
PrintAvgDetectorTime bool
// VerificationOverlap determines whether the scanner will attempt to verify candidate secrets
// that have been detected by multiple detectors.
// By default, it is set to true.
VerificationOverlap bool
// DetectorWorkerMultiplier is used to determine the number of detector workers to spawn.
DetectorWorkerMultiplier int
// NotificationWorkerMultiplier is used to determine the number of notification workers to spawn.
NotificationWorkerMultiplier int
// VerificationOverlapWorkerMultiplier is used to determine the number of verification overlap workers to spawn.
VerificationOverlapWorkerMultiplier int
VerificationResultCache verificationcache.ResultCache
VerificationCacheMetrics verificationcache.MetricsReporter
}
Config used to configure the engine.
type Engine ¶
type Engine struct {
// ahoCorasickHandler manages the Aho-Corasick trie and related keyword lookups.
AhoCorasickCore *ahocorasick.Core
WgNotifier sync.WaitGroup
// contains filtered or unexported fields
}
Engine represents the core scanning engine responsible for detecting secrets in input data. It manages the lifecycle of the scanning process, including initialization, worker management, and result notification. The engine is designed to be flexible and configurable, allowing for customization through various options and configurations.
func (*Engine) AhoCorasickCoreKeywords ¶
AhoCorasickCoreKeywords returns a set of keywords that the engine's AhoCorasickCore is using.
func (*Engine) ChunksChan ¶
func (*Engine) DetectorAvgTime ¶
DetectorAvgTime returns the average time taken by each detector.
func (*Engine) Finish ¶
Finish waits for running sources to complete and workers to finish scanning chunks before closing their respective channels. Once Finish is called, no more sources may be scanned by the engine.
func (*Engine) GetDetectorsMetrics ¶
GetDetectorsMetrics returns a copy of the average time taken by each detector.
func (*Engine) GetMetrics ¶
GetMetrics returns a copy of Metrics. It's safe for concurrent use, and the caller can't modify the original data.
func (*Engine) HasFoundResults ¶
HasFoundResults returns true if any results are found.
func (*Engine) ResultsChan ¶
func (e *Engine) ResultsChan() chan detectors.ResultWithMetadata
func (*Engine) ScanChunk ¶
ScanChunk injects a chunk into the output stream of chunks to be scanned. This method should rarely be used. TODO(THOG-1577): Remove when dependencies no longer rely on this functionality.
func (*Engine) ScanCircleCI ¶
ScanCircleCI scans CircleCI logs.
func (*Engine) ScanConfig ¶
func (e *Engine) ScanConfig(ctx context.Context, configuredSources ...sources.ConfiguredSource) ([]sources.JobProgressRef, error)
ScanConfig starts a scan of all of the configured (but not initialized) sources and returns their job references. If there is an error during initialization or starting of the scan, an error is returned along with the references that successfully started up to that point.
func (*Engine) ScanDocker ¶
func (e *Engine) ScanDocker(ctx context.Context, c sources.DockerConfig) (sources.JobProgressRef, error)
ScanDocker scans a given docker connection.
func (*Engine) ScanElasticsearch ¶
func (e *Engine) ScanElasticsearch(ctx context.Context, c sources.ElasticsearchConfig) (sources.JobProgressRef, error)
ScanElasticsearch scans a Elasticsearch installation.
func (*Engine) ScanFileSystem ¶
func (e *Engine) ScanFileSystem(ctx context.Context, c sources.FilesystemConfig) (sources.JobProgressRef, error)
ScanFileSystem scans a given file system.
func (*Engine) ScanGitHub ¶
func (e *Engine) ScanGitHub(ctx context.Context, c sources.GithubConfig) (sources.JobProgressRef, error)
ScanGitHub scans GitHub with the provided options.
func (*Engine) ScanGitHubExperimental ¶
func (e *Engine) ScanGitHubExperimental(ctx context.Context, c sources.GitHubExperimentalConfig) (sources.JobProgressRef, error)
ScanGitHubExperimental scans GitHub using an experimental feature. Consider all functionality to be in an alpha release here.
func (*Engine) ScanGitLab ¶
func (e *Engine) ScanGitLab(ctx context.Context, c sources.GitlabConfig) (sources.JobProgressRef, error)
ScanGitLab scans GitLab with the provided configuration.
func (*Engine) ScanHuggingface ¶
func (e *Engine) ScanHuggingface(ctx context.Context, c HuggingfaceConfig) (sources.JobProgressRef, error)
ScanGitHub scans HuggingFace with the provided options.
func (*Engine) ScanJenkins ¶
func (e *Engine) ScanJenkins(ctx context.Context, jenkinsConfig JenkinsConfig) (sources.JobProgressRef, error)
ScanJenkins scans Jenkins logs.
func (*Engine) ScanJira ¶
func (e *Engine) ScanJira(ctx context.Context, c sources.JiraConfig) (sources.JobProgressRef, error)
ScanJira scans a given Jira instance (supports both Cloud and Server/Data Center).
func (*Engine) ScanPostman ¶
func (e *Engine) ScanPostman(ctx context.Context, c sources.PostmanConfig) (sources.JobProgressRef, error)
ScanPostman scans Postman with the provided options.
func (*Engine) ScanStdinInput ¶
func (e *Engine) ScanStdinInput(ctx context.Context, c sources.StdinConfig) (sources.JobProgressRef, error)
ScanStdinInput scans input that is piped into the application
func (*Engine) ScanSyslog ¶
func (e *Engine) ScanSyslog(ctx context.Context, c sources.SyslogConfig) (sources.JobProgressRef, error)
ScanSyslog is a source that scans syslog files.
func (*Engine) ScanTravisCI ¶
ScanTravisCI scans TravisCI logs.
type HuggingfaceConfig ¶
type HuggingfaceConfig struct {
Endpoint string
Models []string
Spaces []string
Datasets []string
Organizations []string
Users []string
IncludeModels []string
IgnoreModels []string
IncludeSpaces []string
IgnoreSpaces []string
IncludeDatasets []string
IgnoreDatasets []string
SkipAllModels bool
SkipAllSpaces bool
SkipAllDatasets bool
IncludeDiscussions bool
IncludePrs bool
Token string
Concurrency int
}
HuggingFaceConfig represents the configuration for HuggingFace.
type JenkinsConfig ¶
type Metrics ¶
type Metrics struct {
BytesScanned uint64
ChunksScanned uint64
VerifiedSecretsFound uint64
UnverifiedSecretsFound uint64
AvgDetectorTime map[string]time.Duration
ScanDuration time.Duration
// contains filtered or unexported fields
}
Metrics for the scan engine for external consumption.
type Printer ¶
type Printer interface {
Print(ctx context.Context, r *detectors.ResultWithMetadata) error
}
Printer is used to format found results and output them to the user. Ex JSON, plain text, etc. Please note printer implementations SHOULD BE thread safe.
type PrinterDispatcher ¶
type PrinterDispatcher struct {
// contains filtered or unexported fields
}
PrinterDispatcher wraps an existing Printer implementation and adapts it to the ResultsDispatcher interface.
func NewPrinterDispatcher ¶
func NewPrinterDispatcher(printer Printer) *PrinterDispatcher
NewPrinterDispatcher creates a new PrinterDispatcher instance with the provided Printer.
func (*PrinterDispatcher) Dispatch ¶
func (p *PrinterDispatcher) Dispatch(ctx context.Context, result detectors.ResultWithMetadata) error
Dispatch sends the result to the printer.
type ResultsDispatcher ¶
type ResultsDispatcher interface {
Dispatch(ctx context.Context, result detectors.ResultWithMetadata) error
}
ResultsDispatcher is an interface for dispatching findings of detected results. Implementations can vary from printing results to the console to sending results to an external system.