constants

package
v1.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 11, 2025 License: MIT Imports: 0 Imported by: 0

Documentation

Overview

Package constants provides configuration constants and selectors for the defuddle content extraction system. It includes CSS selectors for finding main content, removing unwanted elements, and processing footnotes.

Index

Constants

View Source
const MobileWidth = 600

MobileWidth is the width threshold for mobile styles JavaScript original code: export const MOBILE_WIDTH = 600;

Variables

View Source
var AllowedAttributes = map[string]bool{
	"alt": true, "allow": true, "allowfullscreen": true, "aria-label": true, "checked": true,
	"colspan": true, "controls": true, "data-latex": true, "data-src": true, "data-srcset": true,
	"data-lang": true, "dir": true, "display": true, "frameborder": true, "headers": true,
	"height": true, "href": true, "lang": true, "role": true, "rowspan": true, "src": true,
	"srcset": true, "title": true, "type": true, "width": true,

	"accent": true, "accentunder": true, "align": true, "columnalign": true, "columnlines": true,
	"columnspacing": true, "columnspan": true, "data-mjx-texclass": true, "depth": true,
	"displaystyle": true, "fence": true, "frame": true, "framespacing": true, "linethickness": true,
	"lspace": true, "mathsize": true, "mathvariant": true, "maxsize": true, "minsize": true,
	"movablelimits": true, "notation": true, "rowalign": true, "rowlines": true, "rowspacing": true,
	"rspace": true, "scriptlevel": true, "separator": true, "stretchy": true, "symmetric": true,
	"voffset": true, "xmlns": true,
}

AllowedAttributes are attributes to keep JavaScript original code: export const ALLOWED_ATTRIBUTES = new Set([

'alt',
'allow',
'allowfullscreen',
'aria-label',
'checked',
'colspan',
'controls',
'data-latex',
'data-src',
'data-srcset',
'data-lang',
'dir',
'display',
'frameborder',
'headers',
'height',
'href',
'lang',
'role',
'rowspan',
'src',
'srcset',
'title',
'type',
'width',

// MathML attributes
'accent',
'accentunder',
'align',
'columnalign',
'columnlines',
'columnspacing',
'columnspan',
'data-mjx-texclass',
'depth',
'displaystyle',
'fence',
'frame',
'framespacing',
'linethickness',
'lspace',
'mathsize',
'mathvariant',
'maxsize',
'minsize',
'movablelimits',
'notation',
'rowalign',
'rowlines',
'rowspacing',
'rowspan',
'rspace',
'scriptlevel',
'separator',
'stretchy',
'symmetric',
'voffset',
'xmlns'

]);

View Source
var AllowedAttributesDebug = map[string]bool{
	"class": true,
	"id":    true,
}

AllowedAttributesDebug are additional attributes to keep in debug mode JavaScript original code: export const ALLOWED_ATTRIBUTES_DEBUG = new Set([

'class',
'id',

]);

View Source
var AllowedEmptyElements = map[string]bool{
	"area": true, "audio": true, "base": true, "br": true, "circle": true, "col": true, "defs": true,
	"ellipse": true, "embed": true, "figure": true, "g": true, "hr": true, "iframe": true, "img": true,
	"input": true, "line": true, "link": true, "mask": true, "meta": true, "object": true, "param": true,
	"path": true, "pattern": true, "picture": true, "polygon": true, "polyline": true, "rect": true,
	"source": true, "stop": true, "svg": true, "td": true, "th": true, "track": true, "use": true,
	"video": true, "wbr": true,
}

AllowedEmptyElements are elements that are allowed to be empty These are not removed even if they have no content JavaScript original code: export const ALLOWED_EMPTY_ELEMENTS = new Set([

'area',
'audio',
'base',
'br',
'circle',
'col',
'defs',
'ellipse',
'embed',
'figure',
'g',
'hr',
'iframe',
'img',
'input',
'line',
'link',
'mask',
'meta',
'object',
'param',
'path',
'pattern',
'picture',
'polygon',
'polyline',
'rect',
'source',
'stop',
'svg',
'td',
'th',
'track',
'use',
'video',
'wbr'

]);

View Source
var BlockElements = []string{
	"div", "section", "article", "main", "aside", "header", "footer", "nav", "content",
}

BlockElements are HTML block-level elements JavaScript original code: export const BLOCK_ELEMENTS = ['div', 'section', 'article', 'main', 'aside', 'header', 'footer', 'nav', 'content'];

View Source
var EntryPointElements = []string{
	"#post",
	".post-content",
	".article-content",
	"#article-content",
	".article_post",
	".article-wrapper",
	".entry-content",
	".content-article",
	".post",
	".markdown-body",
	"article",
	`[role="article"]`,
	"main",
	`[role="main"]`,
	"body",
}

EntryPointElements are the elements that will be used to find the main content JavaScript original code: export const ENTRY_POINT_ELEMENTS = [

'#post',
'.post-content',
'.article-content',
'#article-content',
'.article_post',
'.article-wrapper',
'.entry-content',
'.content-article',
'.post',
'.markdown-body',
'article',
'[role="article"]',
'main',
'[role="main"]',
'body' // ensures there is always a match

];

View Source
var ExactSelectors = []string{}/* 144 elements not displayed */

ExactSelectors are selectors to be removed exactly JavaScript original code: (first part of EXACT_SELECTORS array)

View Source
var FootnoteInlineReferences = []string{
	"sup.reference",
	"cite.ltx_cite",
	`sup[id^="fnr"]`,
	`span[id^="fnr"]`,
	`span[class*="footnote_ref"]`,
	"span.footnote-link",
	"a.citation",
	`a[id^="ref-link"]`,
	`a[href^="#fn"]`,
	`a[href^="#cite"]`,
	`a[href^="#reference"]`,
	`a[href^="#footnote"]`,
	`a[href^="#r"]`,
	`a[href^="#b"]`,
	`a[href*="cite_note"]`,
	`a[href*="cite_ref"]`,
	"a.footnote-anchor",
	"span.footnote-hovercard-target a",
	`a[role="doc-biblioref"]`,
	`a[id^="fnref"]`,
	`a[id^="ref-link"]`,
}

FootnoteInlineReferences are selectors for footnotes and citations JavaScript original code: export const FOOTNOTE_INLINE_REFERENCES = [

'sup.reference',
'cite.ltx_cite',
'sup[id^="fnr"]',
'span[id^="fnr"]',
'span[class*="footnote_ref"]',
'span.footnote-link',
'a.citation',
'a[id^="ref-link"]',
'a[href^="#fn"]',
'a[href^="#cite"]',
'a[href^="#reference"]',
'a[href^="#footnote"]',
'a[href^="#r"]', // Common in academic papers
'a[href^="#b"]', // Common for bibliography references
'a[href*="cite_note"]',
'a[href*="cite_ref"]',
'a.footnote-anchor', // Substack
'span.footnote-hovercard-target a', // Substack
'a[role="doc-biblioref"]', // Science.org
'a[id^="fnref"]',
'a[id^="ref-link"]', // Nature.com

].join(',');

View Source
var FootnoteListSelectors = []string{
	"div.footnote ol",
	"div.footnotes ol",
	`div[role="doc-endnotes"]`,
	`div[role="doc-footnotes"]`,
	"ol.footnotes-list",
	"ol.footnotes",
	"ol.references",
	`ol[class*="article-references"]`,
	"section.footnotes ol",
	`section[role="doc-endnotes"]`,
	`section[role="doc-footnotes"]`,
	`section[role="doc-bibliography"]`,
	"ul.footnotes-list",
	"ul.ltx_biblist",
	`div.footnote[data-component-name="FootnoteToDOM"]`,
}

FootnoteListSelectors are selectors for footnote lists JavaScript original code: export const FOOTNOTE_LIST_SELECTORS = [

'div.footnote ol',
'div.footnotes ol',
'div[role="doc-endnotes"]',
'div[role="doc-footnotes"]',
'ol.footnotes-list',
'ol.footnotes',
'ol.references',
'ol[class*="article-references"]',
'section.footnotes ol',
'section[role="doc-endnotes"]',
'section[role="doc-footnotes"]',
'section[role="doc-bibliography"]',
'ul.footnotes-list',
'ul.ltx_biblist',
'div.footnote[data-component-name="FootnoteToDOM"]' // Substack

].join(',');

View Source
var InlineElements = map[string]bool{
	"a": true, "span": true, "strong": true, "em": true, "i": true, "b": true, "u": true, "code": true, "br": true, "small": true,
	"sub": true, "sup": true, "mark": true, "date": true, "del": true, "ins": true, "q": true, "abbr": true, "cite": true, "relative-time": true, "time": true,
	"font": true,
}

InlineElements are inline elements that should not be unwrapped JavaScript original code: export const INLINE_ELEMENTS = new Set([

'a', 'span', 'strong', 'em', 'i', 'b', 'u', 'code', 'br', 'small',
'sub', 'sup', 'mark', 'date', 'del', 'ins', 'q', 'abbr', 'cite', 'relative-time', 'time',
'font'

]);

View Source
var PartialSelectors = []string{}/* 475 elements not displayed */

PartialSelectors are removal patterns tested against attributes above Case insensitive, partial matches allowed JavaScript original code: (first part of PARTIAL_SELECTORS array)

View Source
var PreserveElements = map[string]bool{
	"pre": true, "code": true, "table": true, "thead": true, "tbody": true, "tr": true, "td": true, "th": true,
	"ul": true, "ol": true, "li": true, "dl": true, "dt": true, "dd": true,
	"figure": true, "figcaption": true, "picture": true,
	"details": true, "summary": true,
	"blockquote": true,
	"form":       true, "fieldset": true,
}

PreserveElements are elements that should not be unwrapped JavaScript original code: export const PRESERVE_ELEMENTS = new Set([

'pre', 'code', 'table', 'thead', 'tbody', 'tr', 'td', 'th',
'ul', 'ol', 'li', 'dl', 'dt', 'dd',
'figure', 'figcaption', 'picture',
'details', 'summary',
'blockquote',
'form', 'fieldset'

]);

View Source
var TestAttributes = []string{
	"class",
	"id",
	"data-test",
	"data-testid",
	"data-test-id",
	"data-qa",
	"data-cy",
}

TestAttributes are attributes to test against for partial matches JavaScript original code: export const TEST_ATTRIBUTES = [

'class',
'id',
'data-test',
'data-testid',
'data-test-id',
'data-qa',
'data-cy'

];

Functions

func GetAllowedEmptyElements

func GetAllowedEmptyElements() []string

GetAllowedEmptyElements returns a slice of allowed empty element names

func GetBlockElements

func GetBlockElements() []string

GetBlockElements returns the block elements slice

func GetEntryPointElements

func GetEntryPointElements() []string

GetEntryPointElements returns the entry point elements slice

func GetExactSelectors

func GetExactSelectors() []string

GetExactSelectors returns the exact selectors slice

func GetFootnoteInlineReferences

func GetFootnoteInlineReferences() []string

GetFootnoteInlineReferences returns the footnote inline reference selectors

func GetFootnoteListSelectors

func GetFootnoteListSelectors() []string

GetFootnoteListSelectors returns the footnote list selectors

func GetInlineElements

func GetInlineElements() []string

GetInlineElements returns a slice of inline element names

func GetMobileWidth

func GetMobileWidth() int

GetMobileWidth returns the mobile width threshold

func GetPartialSelectors

func GetPartialSelectors() []string

GetPartialSelectors returns the partial selectors slice

func GetTestAttributes

func GetTestAttributes() []string

GetTestAttributes returns the test attributes slice

func IsAllowedAttribute

func IsAllowedAttribute(attrName string) bool

IsAllowedAttribute checks if an attribute is allowed

func IsAllowedAttributeDebug

func IsAllowedAttributeDebug(attrName string) bool

IsAllowedAttributeDebug checks if an attribute is allowed in debug mode

func IsAllowedEmptyElement

func IsAllowedEmptyElement(tagName string) bool

IsAllowedEmptyElement checks if an element is allowed to be empty

func IsInlineElement

func IsInlineElement(tagName string) bool

IsInlineElement checks if an element is inline

func IsPreserveElement

func IsPreserveElement(tagName string) bool

IsPreserveElement checks if an element should be preserved

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL