Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type PageHTMLRow ¶
PageHTMLRow mirrors storage.PageHTMLRow to avoid circular imports.
type PageTestResult ¶
type PageTestResult struct {
URL string `json:"url"`
Results map[string]string `json:"results"` // rule_id → "pass"/"fail"/extracted_value
}
PageTestResult holds the test results for a single page.
type RuleType ¶
type RuleType string
RuleType defines the kind of test rule.
const ( StringContains RuleType = "string_contains" StringNotContains RuleType = "string_not_contains" RegexMatch RuleType = "regex_match" RegexNotMatch RuleType = "regex_not_match" HeaderExists RuleType = "header_exists" HeaderNotExists RuleType = "header_not_exists" HeaderContains RuleType = "header_contains" HeaderRegex RuleType = "header_regex" CSSExists RuleType = "css_exists" CSSNotExists RuleType = "css_not_exists" CSSExtractText RuleType = "css_extract_text" CSSExtractAttr RuleType = "css_extract_attr" CSSExtractAllText RuleType = "css_extract_all_text" CSSExtractAllAttr RuleType = "css_extract_all_attr" RegexExtract RuleType = "regex_extract" RegexExtractAll RuleType = "regex_extract_all" XPathExtract RuleType = "xpath_extract" XPathExtractAll RuleType = "xpath_extract_all" )
func (RuleType) IsClickHouseNative ¶
IsClickHouseNative returns true if the rule can run as a ClickHouse SQL expression.
type Ruleset ¶
type Ruleset struct {
ID string `json:"id"`
Name string `json:"name"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
Rules []TestRule `json:"rules"`
RuleCount int `json:"rule_count,omitempty"`
}
Ruleset groups test rules under a named set.
type StorageInterface ¶
type StorageInterface interface {
RunCustomTestsSQL(ctx context.Context, sessionID string, rules []TestRule) (map[string]map[string]string, error)
StreamPagesHTML(ctx context.Context, sessionID string) (<-chan PageHTMLRow, error)
}
StorageInterface is the subset of storage.Store needed by the engine.
type TestRule ¶
type TestRule struct {
ID string `json:"id"`
RulesetID string `json:"ruleset_id"`
Type RuleType `json:"type"`
Name string `json:"name"`
Value string `json:"value"`
Extra string `json:"extra"`
SortOrder int `json:"sort_order"`
}
TestRule is a single test rule within a ruleset.
type TestRunResult ¶
type TestRunResult struct {
RulesetID string `json:"ruleset_id"`
RulesetName string `json:"ruleset_name"`
SessionID string `json:"session_id"`
TotalPages int `json:"total_pages"`
Rules []TestRule `json:"rules"`
Pages []PageTestResult `json:"pages"`
Summary map[string]int `json:"summary"` // rule_id → count of passes
}
TestRunResult is the full output of running a ruleset against a session.
func RunTests ¶
func RunTests(ctx context.Context, store StorageInterface, sessionID string, ruleset *Ruleset) (*TestRunResult, error)
RunTests executes all rules from a ruleset against a crawl session.
Click to show internal directories.
Click to hide internal directories.