Documentation
¶
Index ¶
- Constants
- func ExtractSingle(p *Parser, field PatternField) (any, error)
- func ExtractWithPatterns(p *Parser, patterns []PatternField) ([]map[string]any, error)
- func FromHTML(html string) *builder.Builder
- func FromURL(url string) *builder.Builder
- func ListPipes() []string
- func New() *builder.Builder
- func RegisterPipe(name string, factory func() Pipe)
- func ToJSON(html string) ([]byte, error)
- func ToJSONWithOptions(html string, opts JSONOptions) ([]byte, error)
- func URLToJSON(url string, parseOpts []Option, jsonOpts JSONOptions) ([]byte, error)
- type Extractor
- type HealthResult
- type JSONOptions
- type MultipleType
- type Option
- func WithDisableRandomUA() Option
- func WithHeaders(h map[string]string) Option
- func WithMaxRetries(maxRetries int) Option
- func WithProxy(proxyURL string) Option
- func WithRandomUserAgent() Option
- func WithSuppressErrors() Option
- func WithTimeout(d time.Duration) Option
- func WithUserAgent(ua string) Option
- type ParseError
- type Parser
- type PatternField
- type PatternMeta
- type Pipe
- func CreatePipe(name string) (Pipe, error)
- func NewDateFormatPipe(format string) Pipe
- func NewDecodePipe() Pipe
- func NewExtractEmailPipe() Pipe
- func NewLowerCasePipe() Pipe
- func NewNumberNormalizePipe() Pipe
- func NewReplacePipe(pattern, with string) Pipe
- func NewTrimPipe() Pipe
- func NewURLResolvePipe(baseURL string) Pipe
- func NewUpperCasePipe() Pipe
- type ReturnType
- type Selection
- type ValidationResult
Constants ¶
const ( // ReturnTypeText returns plain text content. ReturnTypeText = parser.ReturnTypeText // ReturnTypeHTML returns HTML content. ReturnTypeHTML = parser.ReturnTypeHTML )
const ( // MultipleNone returns only the first match. MultipleNone = pattern.MultipleNone // MultipleArray returns all matches as an array. MultipleArray = pattern.MultipleArray // MultipleSpace returns all matches joined with spaces. MultipleSpace = pattern.MultipleSpace // MultipleComma returns all matches joined with commas. MultipleComma = pattern.MultipleComma )
Multiple type constants.
Variables ¶
This section is empty.
Functions ¶
func ExtractSingle ¶
func ExtractSingle(p *Parser, field PatternField) (any, error)
ExtractSingle extracts a single field using a pattern.
func ExtractWithPatterns ¶
func ExtractWithPatterns(p *Parser, patterns []PatternField) ([]map[string]any, error)
ExtractWithPatterns extracts data using pattern fields.
func RegisterPipe ¶
RegisterPipe registers a custom pipe factory.
func ToJSONWithOptions ¶
func ToJSONWithOptions(html string, opts JSONOptions) ([]byte, error)
ToJSONWithOptions converts HTML string to JSON with custom options.
Types ¶
type Extractor ¶
Extractor is an alias for internal pattern.Extractor.
func NewExtractor ¶
NewExtractor creates a new Extractor from a Parser.
type HealthResult ¶
type HealthResult = health.HealthResult
HealthResult is an alias for internal health.HealthResult.
func CheckURLHealth ¶
func CheckURLHealth(urls []string, timeout time.Duration) []HealthResult
CheckURLHealth checks the health of URLs concurrently.
func CheckURLHealthSequential ¶
func CheckURLHealthSequential(urls []string, timeout time.Duration) []HealthResult
CheckURLHealthSequential checks URLs sequentially.
func CheckURLHealthWithGet ¶
func CheckURLHealthWithGet(urls []string, timeout time.Duration) []HealthResult
CheckURLHealthWithGet checks URL health using GET requests.
type JSONOptions ¶
type JSONOptions = parser.JSONOptions
JSONOptions is an alias for internal parser.JSONOptions.
func DefaultJSONOptions ¶
func DefaultJSONOptions() JSONOptions
DefaultJSONOptions returns the default JSON conversion options.
type MultipleType ¶
type MultipleType = pattern.MultipleType
MultipleType is an alias for internal pattern.MultipleType.
type Option ¶
type Option func(*config)
Option is a function that configures parsing behavior.
func WithDisableRandomUA ¶
func WithDisableRandomUA() Option
WithDisableRandomUA disables random user agents and uses static user agent.
func WithHeaders ¶
WithHeaders sets custom HTTP headers.
func WithMaxRetries ¶
WithMaxRetries sets the maximum number of retries for failed HTTP requests.
func WithRandomUserAgent ¶
func WithRandomUserAgent() Option
WithRandomUserAgent explicitly enables random user agents (enabled by default).
func WithSuppressErrors ¶
func WithSuppressErrors() Option
WithSuppressErrors enables error suppression for XPath queries. When enabled, XPath errors return nil instead of error values.
func WithTimeout ¶
WithTimeout sets the HTTP request timeout.
type ParseError ¶
ParseError represents an error that occurred during parsing.
func NewParseError ¶
func NewParseError(message string, err error) *ParseError
NewParseError creates a new ParseError.
func (*ParseError) Unwrap ¶
func (e *ParseError) Unwrap() error
Unwrap returns the underlying error.
type Parser ¶
Parser is an alias for internal parser.Parser.
type PatternField ¶
type PatternField = pattern.PatternField
PatternField is an alias for internal pattern.PatternField.
func NewContainerPattern ¶
func NewContainerPattern(key string, xpath string) PatternField
NewContainerPattern creates a new container PatternField.
func NewPatternField ¶
func NewPatternField(key string, xpath string) PatternField
NewPatternField creates a new PatternField.
func NewPatternFieldWithHTML ¶
func NewPatternFieldWithHTML(key string, xpath string) PatternField
NewPatternFieldWithHTML creates a new PatternField that returns HTML content.
func NewPatternFieldWithMultiple ¶
func NewPatternFieldWithMultiple(key string, xpath string, multiple MultipleType) PatternField
NewPatternFieldWithMultiple creates a new PatternField with multiple value handling.
type PatternMeta ¶
type PatternMeta = pattern.PatternMeta
PatternMeta is an alias for internal pattern.PatternMeta.
func DefaultPatternMeta ¶
func DefaultPatternMeta() *PatternMeta
DefaultPatternMeta returns default pattern metadata.
type Pipe ¶
Pipe is an alias for internal pipe.Pipe.
func NewDateFormatPipe ¶
NewDateFormatPipe creates a new DateFormatPipe.
func NewExtractEmailPipe ¶
func NewExtractEmailPipe() Pipe
NewExtractEmailPipe creates a new ExtractEmailPipe.
func NewNumberNormalizePipe ¶
func NewNumberNormalizePipe() Pipe
NewNumberNormalizePipe creates a new NumberNormalizePipe.
func NewReplacePipe ¶
NewReplacePipe creates a new ReplacePipe.
func NewURLResolvePipe ¶
NewURLResolvePipe creates a new URLResolvePipe.
type ReturnType ¶
type ReturnType = parser.ReturnType
ReturnType is an alias for internal parser.ReturnType.
type ValidationResult ¶
type ValidationResult = validator.ValidationResult
ValidationResult is an alias for internal validator.ValidationResult.
func ValidateXPath ¶
func ValidateXPath(html string, xpaths []string, suppressErrors bool) []ValidationResult
ValidateXPath validates XPath expressions against HTML.
func ValidateXPathWithParser ¶
func ValidateXPathWithParser(p *Parser, xpaths []string) []ValidationResult
ValidateXPathWithParser validates XPath using an existing Parser.