Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Document ¶
type Document interface {
// Title extracts the page title from <title> tag.
// Returns empty string if not found.
// Truncates to 200 characters (runes, not bytes).
Title() string
// IndexationStatus determines page indexability with priority:
// non-200 > blocked by meta > non-canonical > indexable
IndexationStatus(statusCode int, finalURL string) types.IndexStatus
// CleanScripts removes executable script elements.
// Returns true if any were removed.
CleanScripts() bool
// GoQueryDoc returns the underlying goquery Document for advanced queries.
GoQueryDoc() *goquery.Document
// HTML returns current HTML as bytes (re-serialized from DOM).
HTML() []byte
// ExtractPageSEO extracts comprehensive SEO metadata from the document.
// statusCode and pageURL are needed for IndexationStatus calculation.
ExtractPageSEO(statusCode int, pageURL string) *types.PageSEO
}
Document provides methods for processing HTML documents.
func ParseWithDOM ¶
ParseWithDOM parses HTML bytes into a Document using DOM parsing.
Click to show internal directories.
Click to hide internal directories.