Documentation
¶
Index ¶
- func AnalyzeProfiles(logger *multilog.Logger, opts ProfileOptions)
- func CalculateChecksum(logger *multilog.Logger, filePath string, algo string) string
- func CalculateChecksumFromContent(content []byte, algo string) string
- func CapPreallocEntries(estimated int) int
- func CaseInsensitiveLess(a, b string) bool
- func CloseBody(logger *multilog.Logger, body io.Closer)
- func CloseFile(logger *multilog.Logger, file *os.File)
- func CopyFile(logger *multilog.Logger, src, dst string) error
- func CopySourceToTarget(logger *multilog.Logger, target c.DownloadTarget) error
- func DetermineSummaryTypeFromPath(path string) string
- func EnsureDirectoryExists(logger *multilog.Logger, dir string) error
- func ExpandIpv4Cidr(logger *multilog.Logger, cidr string) ([]string, error)
- func ExpandIpv4Range(logger *multilog.Logger, ipRange string) []string
- func ExtractArchive(logger *multilog.Logger, archivePath, destFolder string) error
- func ExtractDomains(content string) ([]string, []string)
- func ExtractEntriesWithRegex(content string, regex *regexp.Regexp) ([]string, []string)
- func FindOverlap(logger *multilog.Logger, file1, file2 string) ([]string, int, int)
- func FindProjectRoot(startDir string) (string, error)
- func ForceCopySourceToTarget(logger *multilog.Logger, target c.DownloadTarget) error
- func FormatNameCounts(m map[string]int) []string
- func GetArchiveExtension(uri string) string
- func GetFileLastModifiedTime(logger *multilog.Logger, filePath string) (string, error)
- func GetFilesFromSummaries[T any](summaries []T, summaryType string) map[string]T
- func GetFilesInDir(logger *multilog.Logger, dir string, patterns []string) ([]string, error)
- func GetFoldersToArchive(logger *multilog.Logger, folders map[string]string) map[string]string
- func GetLastSummary[T any](logger *multilog.Logger, summaryFile string, sourceName string) (T, error)
- func GetMapKeys[K comparable, V any](m map[K]V) []K
- func GetSummaryFiles[T any](logger *multilog.Logger, sourceType string, summaryFile string, ...) ([]string, error)
- func GetSummaryTypeFromFolder(folderName string) string
- func GetTimestamp() string
- func GetUserAgent(logger *multilog.Logger, appName string, appVersion string, ...) string
- func IsAlphanumericWithUnderscoresAndDashes(s string) bool
- func IsArchive(filePath string) bool
- func IsCIDR(line string) bool
- func IsComment(line string) bool
- func IsDomain(domain string) bool
- func IsIP(line string) bool
- func IsIPv4(line string) bool
- func IsIPv6(line string) bool
- func IsSkipIP(_ *multilog.Logger, ip string) bool
- func LogMemStats(logger *multilog.Logger, prefix string)
- func ParsePercent(percentStr string) float64
- func PickRandomLines(filePath string, maxLines int) ([]string, error)
- func ReadEntriesFromFile(logger *multilog.Logger, filepath string) ([]string, int, error)
- func ReadEntriesFromFileWithPool(logger *multilog.Logger, filepath string, pool *DTEntryPool) ([]string, int, error)
- func RemoveDuplicates(entries []string) []string
- func ResolveDomainsToIPv4(logger *multilog.Logger, domains []string) ([]string, []string)
- func SaveFile(logger *multilog.Logger, destFolder, fileName string, reader io.Reader) (string, error)
- func SaveSummaries[T any](logger *multilog.Logger, summaries []T, summaryFile string, ...) (int, error)
- func SaveSummary[T any](logger *multilog.Logger, summary T, summaryFile string, ...) int
- func ShouldDownloadSource(logger *multilog.Logger, summaryFile string, sourceName string) bool
- func ShouldDownloadSourceInfo(logger *multilog.Logger, summaryFile string, sourceName string) (bool, string, time.Time, time.Duration)
- func SortCaseInsensitiveStrings(items []string)
- func StartProfiling(logger *multilog.Logger, opts ProfileOptions) func()
- func StringInSlice(str string, slice []string) bool
- func WriteEntriesToFile(logger *multilog.Logger, filepath string, entries []string) error
- func WriteValidEntriesToFile(logger *multilog.Logger, filepath string, entries []string) error
- type DTEntryPool
- type PoolStats
- type ProfileOptions
- type StringSet
- func (s StringSet) Add(str string)
- func (s StringSet) AddAll(entries []string, consider bool) int
- func (s StringSet) AddWithConsider(str string, consider bool) bool
- func (s StringSet) Contains(str string) bool
- func (s StringSet) Get(str string) (bool, bool)
- func (s StringSet) MustConsider(str string) bool
- func (s StringSet) Remove(str string)
- func (s StringSet) RemoveAll(entries []string)
- func (s StringSet) Size() int
- func (s StringSet) ToSlice() []string
- func (s StringSet) ToSliceSorted() []string
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AnalyzeProfiles ¶
func AnalyzeProfiles(logger *multilog.Logger, opts ProfileOptions)
AnalyzeProfiles runs pprof analysis on generated profiles and saves the result to text files This is useful for automated analysis of profiles without requiring manual pprof commands
func CalculateChecksum ¶
CalculateChecksum calculates the checksum of the specified file using the specified algorithm. Supports MD5 and SHA256 algorithms. If the algorithm is empty, it defaults to MD5.
Parameters:
- logger: Logger for recording operations and errors
- filePath: Path to the file to calculate the checksum for
- algo: Algorithm to use ("md5" or "sha256")
Returns:
- A hex string representation of the checksum or an empty string on error
func CalculateChecksumFromContent ¶
CalculateChecksumFromContent calculates the checksum of the provided content using the specified algorithm. Supports MD5 and SHA256 algorithms. If the algorithm is empty, it defaults to MD5.
Parameters:
- content: The byte slice to calculate the checksum for
- algo: Algorithm to use ("md5" or "sha256")
Returns:
- A hex string representation of the checksum
func CapPreallocEntries ¶
CapPreallocEntries limits the estimated entries to avoid excessive or insufficient allocation.
func CaseInsensitiveLess ¶ added in v0.4.0
CaseInsensitiveLess returns true if a < b using case-insensitive comparison.
func CloseFile ¶
CloseFile safely closes the given file and logs an error if it fails.
Parameters:
- logger: Logger for recording errors
- file: The file handle to close
func CopySourceToTarget ¶
func CopySourceToTarget(logger *multilog.Logger, target c.DownloadTarget) error
func DetermineSummaryTypeFromPath ¶
DetermineSummaryTypeFromPath determines the summary type based on a file path
func EnsureDirectoryExists ¶
EnsureDirectoryExists creates a directory if it doesn't exist
func ExpandIpv4Cidr ¶ added in v0.2.0
ExpandIpv4Cidr expands a CIDR notation IPv4 address into individual addresses.
Parameters:
- logger: Logger for recording operations and errors
- cidr: A string containing an IPv4 address in CIDR notation
Returns:
- A slice of strings with all individual IP addresses in the CIDR range
func ExpandIpv4Range ¶ added in v0.2.0
ExpandIpv4Range expands a range of IPv4 addresses into individual addresses. The expected format is "startIP-endIP", e.g., "194.180.49.0-194.180.49.255".
Parameters:
- logger: Logger for recording operations and errors
- ipRange: A string containing the IP range in the format "startIP-endIP"
Returns:
- A slice of strings with all individual IP addresses in the range (inclusive)
func ExtractArchive ¶
ExtractArchive extracts the contents of an archive file (either .tar.gz or .zip) to the specified destination folder.
Parameters:
- archivePath: Path to the archive file
- destFolder: Destination directory where the contents will be extracted
Returns:
- An error object if the extraction fails, nil on success
func ExtractDomains ¶ added in v1.0.4
ExtractDomains parses content line by line, using IsDomain
func ExtractEntriesWithRegex ¶ added in v0.4.0
ExtractEntriesWithRegex extracts entries from content using a regex pattern. Lines that match the regex are considered valid, others invalid.
Parameters:
- content: The content to process
- regex: The regex pattern to match against
Returns:
- A slice of valid entries (match the regex)
- A slice of invalid entries (don't match the regex)
func FindOverlap ¶
FindOverlap finds the overlap between two files and returns the overlapping content. It reads both files, ignores comments, and identifies common lines.
Parameters:
- logger: Logger for recording operations and errors
- file1: Path to the first file
- file2: Path to the second file
Returns:
- A slice of strings representing the overlapping content
- The number of non-comment lines in file1
- The number of non-comment lines in file2
Optimized FindOverlap: uses bufio.Scanner and fast comment detection for performance
func FindProjectRoot ¶
FindProjectRoot walks up the directory tree to find the project root (containing go.mod) If startDir is empty, it uses the current working directory
func ForceCopySourceToTarget ¶ added in v0.3.0
func ForceCopySourceToTarget(logger *multilog.Logger, target c.DownloadTarget) error
func FormatNameCounts ¶ added in v0.4.0
FormatNameCounts returns "name (count)" sorted case-insensitively by name.
func GetArchiveExtension ¶
func GetFileLastModifiedTime ¶
GetFileLastModifiedTime retrieves the last modified time of a file and formats it according to the application's standard timestamp format.
func GetFilesFromSummaries ¶
func GetFilesInDir ¶ added in v1.0.0
func GetFoldersToArchive ¶
GetFoldersToArchive returns a map of folders that should be archived
func GetLastSummary ¶
func GetMapKeys ¶
func GetMapKeys[K comparable, V any](m map[K]V) []K
GetMapKeys returns the keys of a map as a slice. This is a generic function that works with any map that has comparable keys.
Type Parameters:
- K: The type of map keys (must be comparable)
- V: The type of map values
Parameters:
- m: The map to extract keys from
Returns:
- A slice containing all the keys in the map
func GetSummaryFiles ¶
func GetSummaryFiles[T any]( logger *multilog.Logger, sourceType string, summaryFile string, extractFiles func(T, string) []string, ) ([]string, error)
GetSummaryFiles is a generic function that retrieves files from a summary file
func GetSummaryTypeFromFolder ¶
GetSummaryTypeFromFolder determines the summary type based on the folder name
func GetTimestamp ¶
func GetTimestamp() string
GetTimestamp returns the current time formatted according to the application's standard timestamp format.
Returns:
- A string representation of the current time
func GetUserAgent ¶
func GetUserAgent(logger *multilog.Logger, appName string, appVersion string, appDescription string) string
GetUserAgent constructs a User-Agent string for HTTP requests based on application information. If the application name or version is not provided, defaults will be used.
Parameters:
- logger: Logger for recording operations and errors
- appName: The name of the application
- appVersion: The version of the application
- appDescription: Optional description of the application
Returns:
- A formatted User-Agent string
func IsAlphanumericWithUnderscoresAndDashes ¶
IsAlphanumericWithUnderscoresAndDashes checks if a string contains only alphanumeric characters, underscores, and dashes.
func IsCIDR ¶
IsCIDR checks if a string is a valid CIDR notation address.
Parameters:
- line: The string to check
Returns:
- true if the string is a valid CIDR address, false otherwise
func IsComment ¶
IsComment determines if a line is a comment or an empty line. A line is considered a comment if it's empty or starts with any of the common comment prefixes.
Parameters:
- line: The string to check
Returns:
- true if the line is a comment or empty, false otherwise
func IsDomain ¶
IsDomain checks if a string is a valid domain name. It uses the domain regex pattern and also ensures the string is not an IP address.
Parameters:
- domain: The string to check
Returns:
- true if the string is a valid domain name, false otherwise
func IsIP ¶
IsIP checks if a string is a valid IP address (IPv4 or IPv6).
Parameters:
- line: The string to check
Returns:
- true if the string is a valid IP address, false otherwise
func IsIPv4 ¶ added in v0.4.0
IsIPv4 checks if a string is a valid IPv4 address.
Parameters:
- line: The string to check
Returns:
- true if the string is a valid IPv4 address, false otherwise
func IsIPv6 ¶ added in v0.3.0
IsIPv6 checks if a string is a valid IPv6 address.
Parameters:
- line: The string to check
Returns:
- true if the string is a valid IPv6 address, false otherwise
func LogMemStats ¶
func ParsePercent ¶
ParsePercent converts a percentage string to a float64 for comparison
func PickRandomLines ¶
PickRandomLines reads a file and returns a specified number of random lines from it. If maxLines is 0, it returns all the lines from the file (excluding comments).
Parameters:
- filePath: Path to the file to read
- maxLines: Maximum number of lines to return (0 for all)
Returns:
- A slice of strings with the selected lines
- An error object if reading fails, nil on success
func ReadEntriesFromFile ¶
ReadEntriesFromFile reads entries from a file and returns them as a slice of strings. Comments are ignored, and duplicate entries are removed.
Parameters:
- filepath: Path to the file to read
Returns:
- A slice of strings containing the unique entries
- Number of duplicate entries found
- An error object if reading fails, nil on success
func ReadEntriesFromFileWithPool ¶
func ReadEntriesFromFileWithPool(logger *multilog.Logger, filepath string, pool *DTEntryPool) ([]string, int, error)
ReadEntriesFromFileWithPool reads entries from a file using a string intern pool.
Parameters:
- logger: Logger for recording operations and errors
- filepath: Path to the file to read
- pool: Optional string intern pool for memory optimization (maybe nil)
Returns:
- A slice of strings containing the unique entries
- Number of duplicate entries found
- An error object if reading fails, nil on success
func RemoveDuplicates ¶
func ResolveDomainsToIPv4 ¶ added in v0.4.0
func SaveFile ¶
func SaveFile(logger *multilog.Logger, destFolder, fileName string, reader io.Reader) (string, error)
SaveFile saves the content from the reader to the specified destination folder and file name. It creates the destination folder if it doesn't exist.
Parameters:
- logger: Logger for recording operations and errors
- destFolder: Target directory where the file will be saved
- fileName: Name of the file to create
- reader: Source of content to be saved
Returns:
- The absolute path of the saved file or an empty string on error
- An error object if the operation failed, nil on success
func SaveSummaries ¶
func SaveSummaries[T any]( logger *multilog.Logger, summaries []T, summaryFile string, lessFunc func(i, j T) bool, ) (int, error)
SaveSummaries saves multiple summary objects to a JSON file. The summaries are sorted using the provided comparison function before being saved. If the summary file already exists, it will be archived with a timestamp suffix. Small elements are written compactly while preserving readability.
Type Parameters:
- T: The type of the summary objects
Parameters:
- logger: Logger for recording operations and errors
- summaries: Slice of summary objects to save
- summaryFile: Path where the summaries will be saved
- lessFunc: Function for sorting summaries
Returns:
- The number of summaries written to the file
- An error object if the operation failed, nil on success
func SaveSummary ¶
func SaveSummary[T any]( logger *multilog.Logger, summary T, summaryFile string, lessFunc func(i, j T) bool, ) int
SaveSummary saves a single summary object to the specified file. This is a convenience function that calls SaveSummaries with a slice containing only the provided summary.
Type Parameters:
- T: The type of the summary object
Parameters:
- logger: Logger for recording operations and errors
- summary: The summary object to save
- summaryFile: Path where the summary will be saved
- lessFunc: Function for sorting summaries
Returns:
- The number of summaries written to the file
func ShouldDownloadSource ¶
func ShouldDownloadSourceInfo ¶ added in v1.0.1
func ShouldDownloadSourceInfo( logger *multilog.Logger, summaryFile string, sourceName string, ) (bool, string, time.Time, time.Duration)
ShouldDownloadSourceInfo checks if a fresh download should occur for the given source. Returns:
shouldDownload - whether a fresh download should occur frequencyLabel - daily, weekly or monthly lastDownloadTime - zero time if not available remaining - duration until next allowed download (zero if shouldDownload == true)
func SortCaseInsensitiveStrings ¶ added in v0.4.0
func SortCaseInsensitiveStrings(items []string)
SortCaseInsensitiveStrings sorts the provided slice of strings in-place using a case-insensitive order.
func StartProfiling ¶
func StartProfiling(logger *multilog.Logger, opts ProfileOptions) func()
StartProfiling starts profiling based on provided options Returns a function that should be deferred to stop profiling and create profiles if enabled
func StringInSlice ¶
func WriteEntriesToFile ¶
WriteEntriesToFile writes the given entries to the specified file. The entries are sorted before writing, and each entry is written on a separate line.
Parameters:
- logger: Logger for recording operations and errors
- filepath: Path to the file to write
- entries: Slice of strings to write
Returns:
- An error object if writing fails, nil on success
func WriteValidEntriesToFile ¶ added in v1.0.1
WriteValidEntriesToFile filters out comments from the entries and writes the valid entries to the specified file.
Parameters:
- logger: Logger for recording operations and errors
- filepath: Path to the file to write
- entries: Slice of strings to filter and write
Returns:
- An error object if writing fails, nil on success
Types ¶
type DTEntryPool ¶
type DTEntryPool struct {
// contains filtered or unexported fields
}
DTEntryPool is a string interning pool that allows for efficient storage and retrieval of DNS entries.
func NewDTEntryPool ¶
func NewDTEntryPool() *DTEntryPool
NewDTEntryPool creates a new string interning pool.
func (*DTEntryPool) Intern ¶
func (p *DTEntryPool) Intern(s string) string
Intern adds a string to the pool if it is not already present and returns the interned string.
func (*DTEntryPool) InternMany ¶
func (p *DTEntryPool) InternMany(strings []string) []string
InternMany interns multiple strings in a batch
func (*DTEntryPool) Size ¶
func (p *DTEntryPool) Size() int
Size returns the number of strings in the pool.
func (*DTEntryPool) Stats ¶
func (p *DTEntryPool) Stats() PoolStats
Stats return the current hit/miss statistics for the pool.
type PoolStats ¶
type PoolStats struct {
// contains filtered or unexported fields
}
PoolStats tracks performance statistics for the entry pool.
type ProfileOptions ¶
type ProfileOptions struct {
ProfileNameBase string
OutputDir string
BlockProfileRate int
CPUProfile bool
MemProfile bool
GoroutineProfile bool
BlockProfile bool
}
ProfileOptions defines the configuration for profiling
type StringSet ¶
func NewStringSet ¶
func (StringSet) AddAll ¶
AddAll adds all entries to the set, optionally considering existing entries. It returns the number of entries that were already in the set.
func (StringSet) AddWithConsider ¶
AddWithConsider adds a string to the set with the specified consider flag. It returns true if the entry was added (not found in the set). If the entry is already in the set, it updates the consider flag.