Documentation
¶
Overview ¶
ABOUTME: Helper functions for optimized DOM operations using the existing cache system. These functions provide convenient wrappers around the cache API for common DOM operations.
Index ¶
- Variables
- func BatchCachedFind(element *goquery.Selection, selectors []string) map[string]*goquery.Selection
- func CachedAttr(element *goquery.Selection, attrName string) (string, bool)
- func CachedFind(element *goquery.Selection, selector string) *goquery.Selection
- func CachedHasClass(element *goquery.Selection, className string) bool
- func CachedText(element *goquery.Selection) string
- func OptimizedLinkDensity(element *goquery.Selection) float64
- type CacheEntry
- type CacheManager
- type CacheStats
- type CachedElementOperations
- func (ceo *CachedElementOperations) BatchCachedFind(element *goquery.Selection, selectors []string) map[string]*goquery.Selection
- func (ceo *CachedElementOperations) CachedAttr(element *goquery.Selection, attrName string) (string, bool)
- func (ceo *CachedElementOperations) CachedChildren(element *goquery.Selection) *goquery.Selection
- func (ceo *CachedElementOperations) CachedFind(element *goquery.Selection, selector string) *goquery.Selection
- func (ceo *CachedElementOperations) CachedHasClass(element *goquery.Selection, className string) bool
- func (ceo *CachedElementOperations) CachedParent(element *goquery.Selection) *goquery.Selection
- func (ceo *CachedElementOperations) CachedText(element *goquery.Selection) string
- func (ceo *CachedElementOperations) ClearElementCache()
- func (ceo *CachedElementOperations) GetCacheStats() CacheStats
- func (ceo *CachedElementOperations) OptimizedLinkDensity(element *goquery.Selection) float64
- type DOMCache
- func (dc *DOMCache) CleanupExpired() int
- func (dc *DOMCache) Clear()
- func (dc *DOMCache) GetAttribute(documentHash, selector, attribute string) (string, bool)
- func (dc *DOMCache) GetSelectorResult(key SelectorCacheKey) (*goquery.Selection, bool)
- func (dc *DOMCache) GetStats() CacheStats
- func (dc *DOMCache) GetTextContent(documentHash, selector string) (string, bool)
- func (dc *DOMCache) SetAttribute(documentHash, selector, attribute, value string, ttl time.Duration)
- func (dc *DOMCache) SetSelectorResult(key SelectorCacheKey, selection *goquery.Selection, ttl time.Duration)
- func (dc *DOMCache) SetTextContent(documentHash, selector, text string, ttl time.Duration)
- type ExtractionCache
- func (ec *ExtractionCache) CleanupExpired() int
- func (ec *ExtractionCache) GetExtractionResult(url string) (interface{}, bool)
- func (ec *ExtractionCache) GetFieldResult(url, field string) (interface{}, bool)
- func (ec *ExtractionCache) GetStats() CacheStats
- func (ec *ExtractionCache) SetExtractionResult(url string, result interface{}, ttl time.Duration)
- func (ec *ExtractionCache) SetFieldResult(url, field string, result interface{}, ttl time.Duration)
- type SelectorCacheKey
Constants ¶
This section is empty.
Variables ¶
var ( GlobalDOMCache = NewDOMCache() GlobalExtractionCache = NewExtractionCache() )
Global cache instances
var GlobalCachedOps = NewCachedElementOperations()
Global cached operations instance
Functions ¶
func BatchCachedFind ¶
func CachedFind ¶
Global helper functions for easy access
func CachedText ¶
func OptimizedLinkDensity ¶
Types ¶
type CacheEntry ¶
type CacheEntry struct { Value interface{} `json:"value"` CreatedAt time.Time `json:"created_at"` AccessCount int64 `json:"access_count"` LastAccess time.Time `json:"last_access"` TTL time.Duration `json:"ttl,omitempty"` }
CacheEntry represents a cached item with metadata
type CacheManager ¶
type CacheManager struct {
// contains filtered or unexported fields
}
CacheManager coordinates multiple cache types
func NewCacheManager ¶
func NewCacheManager(cleanupInterval time.Duration) *CacheManager
NewCacheManager creates a new cache manager with automatic cleanup
func (*CacheManager) GetAllStats ¶
func (cm *CacheManager) GetAllStats() map[string]CacheStats
GetAllStats returns statistics for all caches
func (*CacheManager) GetDOMCache ¶
func (cm *CacheManager) GetDOMCache() *DOMCache
GetDOMCache returns the DOM cache instance
func (*CacheManager) GetExtractionCache ¶
func (cm *CacheManager) GetExtractionCache() *ExtractionCache
GetExtractionCache returns the extraction cache instance
func (*CacheManager) Stop ¶
func (cm *CacheManager) Stop()
Stop stops the cache manager and cleanup goroutine
type CacheStats ¶
type CacheStats struct { Hits int64 `json:"hits"` Misses int64 `json:"misses"` Sets int64 `json:"sets"` Evictions int64 `json:"evictions"` HitRatio float64 `json:"hit_ratio"` TotalEntries int64 `json:"total_entries"` MemoryUsageKB int64 `json:"memory_usage_kb"` LastCleanup time.Time `json:"last_cleanup"` }
CacheStats tracks cache performance metrics
type CachedElementOperations ¶
type CachedElementOperations struct {
// contains filtered or unexported fields
}
CachedElementOperations provides optimized DOM operations using the existing cache system
func NewCachedElementOperations ¶
func NewCachedElementOperations() *CachedElementOperations
NewCachedElementOperations creates a new cached element operations helper
func (*CachedElementOperations) BatchCachedFind ¶
func (ceo *CachedElementOperations) BatchCachedFind(element *goquery.Selection, selectors []string) map[string]*goquery.Selection
BatchCachedFind performs multiple selector queries efficiently
func (*CachedElementOperations) CachedAttr ¶
func (ceo *CachedElementOperations) CachedAttr(element *goquery.Selection, attrName string) (string, bool)
CachedAttr gets cached attribute value for an element
func (*CachedElementOperations) CachedChildren ¶
func (ceo *CachedElementOperations) CachedChildren(element *goquery.Selection) *goquery.Selection
CachedChildren gets cached children for an element
func (*CachedElementOperations) CachedFind ¶
func (ceo *CachedElementOperations) CachedFind(element *goquery.Selection, selector string) *goquery.Selection
CachedFind performs a cached selector query
func (*CachedElementOperations) CachedHasClass ¶
func (ceo *CachedElementOperations) CachedHasClass(element *goquery.Selection, className string) bool
CachedHasClass checks if an element has a specific class using cached attributes
func (*CachedElementOperations) CachedParent ¶
func (ceo *CachedElementOperations) CachedParent(element *goquery.Selection) *goquery.Selection
CachedParent gets cached parent for an element
func (*CachedElementOperations) CachedText ¶
func (ceo *CachedElementOperations) CachedText(element *goquery.Selection) string
CachedText gets cached text content for an element
func (*CachedElementOperations) ClearElementCache ¶
func (ceo *CachedElementOperations) ClearElementCache()
ClearElementCache clears the cache for better memory management
func (*CachedElementOperations) GetCacheStats ¶
func (ceo *CachedElementOperations) GetCacheStats() CacheStats
GetCacheStats returns cache performance statistics
func (*CachedElementOperations) OptimizedLinkDensity ¶
func (ceo *CachedElementOperations) OptimizedLinkDensity(element *goquery.Selection) float64
OptimizedLinkDensity calculates link density using cached operations
type DOMCache ¶
type DOMCache struct {
// contains filtered or unexported fields
}
DOMCache provides thread-safe caching for DOM-related operations
func (*DOMCache) CleanupExpired ¶
CleanupExpired removes expired cache entries
func (*DOMCache) GetAttribute ¶
GetAttribute retrieves cached attribute values
func (*DOMCache) GetSelectorResult ¶
func (dc *DOMCache) GetSelectorResult(key SelectorCacheKey) (*goquery.Selection, bool)
GetSelectorResult retrieves cached selector query results
func (*DOMCache) GetStats ¶
func (dc *DOMCache) GetStats() CacheStats
GetStats returns current cache statistics
func (*DOMCache) GetTextContent ¶
GetTextContent retrieves cached text extraction results
func (*DOMCache) SetAttribute ¶
func (dc *DOMCache) SetAttribute(documentHash, selector, attribute, value string, ttl time.Duration)
SetAttribute caches attribute values
func (*DOMCache) SetSelectorResult ¶
func (dc *DOMCache) SetSelectorResult(key SelectorCacheKey, selection *goquery.Selection, ttl time.Duration)
SetSelectorResult caches selector query results
type ExtractionCache ¶
type ExtractionCache struct {
// contains filtered or unexported fields
}
ExtractionCache provides thread-safe caching for extraction results
func NewExtractionCache ¶
func NewExtractionCache() *ExtractionCache
NewExtractionCache creates a new extraction result cache
func (*ExtractionCache) CleanupExpired ¶
func (ec *ExtractionCache) CleanupExpired() int
CleanupExpired method for ExtractionCache
func (*ExtractionCache) GetExtractionResult ¶
func (ec *ExtractionCache) GetExtractionResult(url string) (interface{}, bool)
GetExtractionResult retrieves cached extraction results for a URL
func (*ExtractionCache) GetFieldResult ¶
func (ec *ExtractionCache) GetFieldResult(url, field string) (interface{}, bool)
GetFieldResult retrieves cached field extraction results
func (*ExtractionCache) GetStats ¶
func (ec *ExtractionCache) GetStats() CacheStats
GetStats returns cache statistics for ExtractionCache
func (*ExtractionCache) SetExtractionResult ¶
func (ec *ExtractionCache) SetExtractionResult(url string, result interface{}, ttl time.Duration)
SetExtractionResult caches extraction results for a URL
func (*ExtractionCache) SetFieldResult ¶
func (ec *ExtractionCache) SetFieldResult(url, field string, result interface{}, ttl time.Duration)
SetFieldResult caches individual field extraction results
type SelectorCacheKey ¶
type SelectorCacheKey struct { DocumentHash string Selector string Operation string // "find", "text", "attr", etc. Attribute string // for attribute operations }
SelectorCacheKey generates a cache key for selector operations
func (SelectorCacheKey) String ¶
func (key SelectorCacheKey) String() string
generateKey creates a fast hash-based cache key