Versions in this module Expand all Collapse all v1 v1.1.1 Feb 7, 2026 Changes in this version + const ImageDir + func CountTokens(text string) int + func ExtractHeadings(markdown []byte) []string + func InitTokenizer(encoding string) error + func IsInitialized() bool + type Chunk struct + Content string + HeadingHierarchy []string + TokenCount int + func ChunkMarkdown(markdown string, cfg ChunkerConfig) ([]Chunk, error) + type ChunkerConfig struct + ChunkOverlap int + MaxChunkSize int + func DefaultChunkerConfig() ChunkerConfig + type ContentProcessor struct + func NewContentProcessor(imgProcessor *ImageProcessor, appCfg config.AppConfig, log *logrus.Logger) *ContentProcessor + func (cp *ContentProcessor) ExtractProcessAndSaveContent(doc *goquery.Document, finalURL *url.URL, siteCfg config.SiteConfig, ...) (pageTitle string, savedFilePath string, err error) + type ImageDownloadTask struct + AbsImgURL string + BaseImgURL *url.URL + Ctx context.Context + ExtractedCaption string + ImgHost string + ImgLogEntry *logrus.Entry + NormImgURL string + type ImageProcessor struct + func NewImageProcessor(store storage.ImageStore, fetcher *fetch.Fetcher, ...) *ImageProcessor + func (ip *ImageProcessor) ProcessImages(mainContent *goquery.Selection, finalURL *url.URL, siteCfg config.SiteConfig, ...) (imageMap map[string]models.ImageData, imageErrs []error) + type LinkProcessor struct + func NewLinkProcessor(store storage.PageStore, pq *queue.ThreadSafePriorityQueue, ...) *LinkProcessor + func (lp *LinkProcessor) ExtractAndQueueLinks(originalDoc *goquery.Document, finalURL *url.URL, currentDepth int, ...) (queuedCount int, err error)