Documentation
¶
Index ¶
- Constants
- Variables
- func ApplyColorScheme(page *rod.Page, scheme string) error
- func ApplyDevice(page *rod.Page, d Device) error
- func ApplyProxyAuth(page *rod.Page, proxyURL string) error
- func ApplyStealth(page *rod.Page) error
- func ApplyTimezone(page *rod.Page, tz string) error
- func ApplyUserAgent(page *rod.Page, ua string) error
- func ClickElement(page *rod.Page, el *rod.Element) error
- func ClickRef(page *rod.Page, ref string, snapshot *PageSnapshot) error
- func CloseTab(browser *rod.Browser, index int) (proto.TargetTargetID, error)
- func CountSelector(page *rod.Page, selector string) int
- func DecryptCookiesInPlace(cookiesPath string) (total, decrypted, failed int, err error)
- func DefaultExtensionPaths() ([]string, error)
- func DefaultExtensionsDir() (string, error)
- func DetectAgent() bool
- func DismissCookieBanner(page *rod.Page) bool
- func EvalJS(page *rod.Page, expr string, elementRef string, snapshot *PageSnapshot) (string, error)
- func FindSystemChromeBinary() string
- func FormatCollect(r *CollectResult) string
- func FormatDiff(d SnapshotDiff) string
- func FormatErrors(errors []ErrorEntry) string
- func FormatMultiCollect(r *MultiCollectResult) string
- func FormatPreview(r *PreviewResult) string
- func FormatPreviewProfile(r *PreviewResult, p RenderProfile) string
- func FormatText(result *ExtractionResult) string
- func FormatTextProfile(result *ExtractionResult, p RenderProfile) string
- func HasSelector(page *rod.Page, selector string) bool
- func HoverElement(page *rod.Page, el *rod.Element) error
- func HoverRef(page *rod.Page, ref string, snapshot *PageSnapshot) error
- func HumanMode() bool
- func InjectCookies(page *rod.Page, cookies []CookieRecord) (int, error)
- func LoadFulfillBody(value string) ([]byte, error)
- func LoadStorageState(browser *rod.Browser, page *rod.Page, state *StorageState) error
- func NewLauncher(opts LauncherOpts) *launcher.Launcher
- func ParseBlockList(s string) []string
- func PressKey(page *rod.Page, key string, ref string, snapshot *PageSnapshot) error
- func ProfileUsesSystemChrome(userDataDir string) bool
- func ResolveByLocator(page *rod.Page, loc Locator) (*rod.Element, error)
- func ResolveProfileDir(name string) (string, error)
- func ResolveRef(page *rod.Page, ref string, snapshot *PageSnapshot) (*rod.Element, error)
- func SaveCookiesJSON(profileDir string, cookies []CookieRecord) error
- func ScrollBy(page *rod.Page, dy int) (int, error)
- func ScrollToRef(page *rod.Page, ref string, snapshot *PageSnapshot) error
- func ScrollToY(page *rod.Page, y int, bottomSentinel bool) (int, error)
- func SelectOption(page *rod.Page, ref string, values []string, snapshot *PageSnapshot) error
- func SetHumanMode(enabled bool)
- func SetViewport(page *rod.Page, width, height int) error
- func SwitchTab(browser *rod.Browser, index int) (*rod.Page, error)
- func TakeScreenshot(page *rod.Page, fullPage bool, elementRef string, quality int, ...) ([]byte, error)
- func TruncateURL(u string, maxLen int) string
- func TypeElement(page *rod.Page, el *rod.Element, text string) error
- func TypeRef(page *rod.Page, ref string, text string, snapshot *PageSnapshot) error
- func UploadRef(page *rod.Page, ref string, selector string, files []string, ...) error
- func ValidateExtractLevel(level ExtractLevel) error
- func WaitForBotChallenge(page *rod.Page, timeout time.Duration) bool
- func WaitForPage(page *rod.Page, waitStrategy string) error
- func WaitForSelector(page *rod.Page, selector string, timeoutSec int) error
- func WriteHAR(h *HAR, path string) error
- type Browser
- func (b *Browser) Close()
- func (b *Browser) Connected() bool
- func (b *Browser) CurrentTargetID() string
- func (b *Browser) DeleteSnapshot(targetID proto.TargetTargetID) error
- func (b *Browser) Page() (*rod.Page, error)
- func (b *Browser) RodBrowser() *rod.Browser
- func (b *Browser) SaveSnapshot(page *rod.Page, result *ExtractionResult) error
- func (b *Browser) SetCurrentPage(page *rod.Page) error
- func (b *Browser) Snapshot(page *rod.Page) *PageSnapshot
- type BrowserOpts
- type CaptureSession
- type CaptureSpec
- type CapturedEntry
- type CollectResult
- type CollectedItem
- type CookieRecord
- type Device
- type DialogResult
- type DiffEntry
- type DiffNode
- type DiffStats
- type ErrorCollector
- type ErrorEntry
- type ExtractLevel
- type ExtractedNode
- type ExtractionResult
- type ExtractionStats
- type HAR
- type HARContent
- type HARCreator
- type HAREntry
- type HARLog
- type HARPage
- type HARRequest
- type HARResponse
- type HARTimings
- type ImageDiffResult
- type InterceptSession
- type InterceptSpec
- type InterceptStats
- type LauncherOpts
- type Locator
- type MultiCollectResult
- type NetworkEntry
- type PageInfo
- type PageSnapshot
- type PreviewResult
- type PreviewSummary
- type RefSnapshot
- type RenderProfile
- type SiteResult
- type SnapshotDiff
- type StorageCookie
- type StorageKeyValue
- type StorageOrigin
- type StorageState
- type TabInfo
Constants ¶
const CookiesJSONFilename = ".ghostchrome-cookies.json"
CookiesJSONFilename is the filename written by import-profile inside the ghostchrome profile dir. openPage() reads this on startup and replays the cookies into Chrome via CDP Network.setCookies.
Variables ¶
var DefaultExtensionNames = []string{"ublock", "icdc", "force-bg"}
DefaultExtensionNames lists the bundled extension slugs ghostchrome looks for under ~/.ghostchrome/extensions/<name>/ when --default-extensions is set. Mirrors browser-use's defaults.
var ErrStaleRef = errors.New("stale ref: snapshot is missing or no longer matches the page")
ErrStaleRef indicates that a ref no longer maps to a live element.
Functions ¶
func ApplyColorScheme ¶ added in v0.4.0
ApplyColorScheme emulates prefers-color-scheme. Accepts "dark", "light", "no-preference" (case-insensitive).
func ApplyDevice ¶ added in v0.4.0
ApplyDevice applies viewport metrics, UA, and touch emulation from a preset.
func ApplyProxyAuth ¶ added in v0.7.0
ApplyProxyAuth wires CDP Fetch interception on the page so that basic-auth challenges issued by an upstream proxy (e.g. Bright Data, Smartproxy) are answered automatically with the credentials embedded in proxyURL.
Chromium ignores user:password embedded in --proxy-server URLs and instead surfaces a blocking basic-auth dialog. The work-around is to enable Fetch.enable{handleAuthRequests:true} and respond to Fetch.authRequired via Fetch.continueWithAuth.
If proxyURL is empty or carries no credentials, this is a silent no-op.
func ApplyStealth ¶
ApplyStealth applies anti-detection patches to a page via CDP. Targets DataDome, Akamai, and similar bot-detection systems.
func ApplyTimezone ¶ added in v0.4.0
ApplyTimezone overrides the JavaScript Date/Intl timezone.
func ApplyUserAgent ¶ added in v0.4.0
ApplyUserAgent overrides navigator.userAgent and the HTTP User-Agent header.
func ClickElement ¶ added in v0.6.0
ClickElement performs a click on an already-resolved element (used by the locator path so the same scroll+click+wait logic is shared).
func ClickRef ¶
func ClickRef(page *rod.Page, ref string, snapshot *PageSnapshot) error
ClickRef clicks the element at the given ref.
func CountSelector ¶ added in v0.4.0
CountSelector returns the number of elements matching selector. Errors are swallowed and reported as 0 so the polling caller can distinguish "not-yet-rendered" from "match count below threshold".
func DecryptCookiesInPlace ¶ added in v0.7.0
DecryptCookiesInPlace opens the SQLite Cookies database at path, decrypts every v10-tagged encrypted_value using the macOS Keychain Chrome key, and rewrites the row with the plaintext in `value` (encrypted_value cleared). Kept for backward compat with profiles imported before the JSON export scheme. Returns counts: total, decrypted, failed.
func DefaultExtensionPaths ¶ added in v0.7.0
DefaultExtensionPaths returns absolute paths to the bundled extensions (uBlock Origin Lite, "I still don't care about cookies", Force Background Tab) that exist on disk. Missing entries are silently skipped. When none are found a hint is printed to stderr.
func DefaultExtensionsDir ¶ added in v0.7.0
DefaultExtensionsDir returns the absolute path to the per-user extensions directory: ~/.ghostchrome/extensions.
func DetectAgent ¶ added in v0.2.0
func DetectAgent() bool
DetectAgent reports whether the current process runs inside an LLM agent. Detection order:
- GHOSTCHROME_PROFILE=agent|human (explicit override).
- GHOSTCHROME_AGENT=1|true (explicit opt-in).
- Known agent environment variables set by Claude Code, Cursor, Aider, Devin, Gemini CLI, and similar tools.
func DismissCookieBanner ¶
DismissCookieBanner attempts to find and click a cookie accept button. Returns true if a banner was found and dismissed.
func EvalJS ¶
EvalJS evaluates JavaScript on the page or in an element context. If elementRef is non-empty, the JS runs with `this` bound to that element.
func FindSystemChromeBinary ¶ added in v0.7.0
func FindSystemChromeBinary() string
FindSystemChromeBinary returns the absolute path to a Google Chrome binary matching the com.google.Chrome bundle id, or "" if none found. Used when the imported profile requires keychain-bound cookie decryption.
func FormatCollect ¶ added in v0.2.0
func FormatCollect(r *CollectResult) string
FormatCollect renders a compact text table from collected items.
func FormatDiff ¶ added in v0.2.0
func FormatDiff(d SnapshotDiff) string
FormatDiff renders a SnapshotDiff as compact text.
func FormatErrors ¶
func FormatErrors(errors []ErrorEntry) string
FormatErrors formats errors as compact text lines.
func FormatMultiCollect ¶ added in v0.2.0
func FormatMultiCollect(r *MultiCollectResult) string
FormatMultiCollect renders a compact text report for multi-URL results.
func FormatPreview ¶
func FormatPreview(r *PreviewResult) string
FormatPreview renders a compact text report (human profile).
func FormatPreviewProfile ¶ added in v0.2.0
func FormatPreviewProfile(r *PreviewResult, p RenderProfile) string
FormatPreviewProfile renders the preview using the given profile. In agent mode, empty sections and zero-stat headers are dropped, failed requests are grouped by status code and the DOM dump uses one-letter role tags.
func FormatText ¶
func FormatText(result *ExtractionResult) string
FormatText renders the extraction result as compact text (human profile).
func FormatTextProfile ¶ added in v0.2.0
func FormatTextProfile(result *ExtractionResult, p RenderProfile) string
FormatTextProfile renders the extraction result using the given profile. The agent profile uses one-letter role tags, truncates long labels and shortens hrefs (see TruncateURL).
func HasSelector ¶ added in v0.4.0
HasSelector reports whether at least one element matches selector. It is a non-throwing alternative to page.Element used by polling loops.
func HoverElement ¶ added in v0.6.0
HoverElement hovers on an already-resolved element.
func HoverRef ¶
func HoverRef(page *rod.Page, ref string, snapshot *PageSnapshot) error
HoverRef hovers over the element at the given ref.
func InjectCookies ¶ added in v0.7.0
func InjectCookies(page *rod.Page, cookies []CookieRecord) (int, error)
InjectCookies sets the given cookies on the browser via CDP, one at a time. A bad cookie (binary garbage from a misdecoded entry, mismatched domain, etc.) doesn't poison the rest of the batch. Returns the number of successful injections.
func LoadFulfillBody ¶ added in v0.5.0
LoadFulfillBody returns raw bytes from a @path literal or the string otherwise. Useful so CLI flags can take `"@mock.json"` or an inline payload.
func LoadStorageState ¶ added in v0.3.0
LoadStorageState restores cookies browser-wide and localStorage per origin. For each origin referenced, the current page is navigated there briefly so the localStorage write lands in the right context.
func NewLauncher ¶ added in v0.2.0
func NewLauncher(opts LauncherOpts) *launcher.Launcher
NewLauncher returns a configured launcher with the shared anti-detection flags used by both auto-launch (NewBrowser) and the `serve` command. --no-sandbox is auto-enabled when running inside a CI runner (env GITHUB_ACTIONS / CI) or as root, because those environments disable the Chrome sandbox.
func ParseBlockList ¶ added in v0.5.0
ParseBlockList splits a comma-separated glob list, trimming spaces and dropping empty entries.
func PressKey ¶
PressKey sends a keyboard key press. If ref is non-empty, focuses the element first.
func ProfileUsesSystemChrome ¶ added in v0.7.0
ProfileUsesSystemChrome returns true when the profile dir was created by `ghostchrome import-profile` and is bound to the system Chrome binary. We keep this for backward compatibility but with the cookie decryption path it's no longer required — the bundled Chromium can now read the imported cookies because they were rewritten as plaintext.
func ResolveByLocator ¶ added in v0.6.0
ResolveByLocator returns the first element matching the locator. Matching strategy:
- If Text is set: XPath text contains (case-insensitive). Matches <button>, <a>, <label>, generic text containers.
- Else: extract the a11y tree at skeleton level, filter by (role, name|label) and return the first hit.
Snapshot is updated as a side-effect only for the Text branch when a new Extract is triggered.
func ResolveProfileDir ¶ added in v0.7.0
ResolveProfileDir converts a short profile name into the canonical persistent Chrome user_data_dir path under ~/.ghostchrome/profiles/<name>. The directory is created with 0700 perms if missing.
func ResolveRef ¶
ResolveRef finds an element by its ref (@1, @2, etc.) using a persisted snapshot.
func SaveCookiesJSON ¶ added in v0.7.0
func SaveCookiesJSON(profileDir string, cookies []CookieRecord) error
SaveCookiesJSON writes a portable cookie snapshot inside the profile dir.
func ScrollToRef ¶ added in v0.4.0
func ScrollToRef(page *rod.Page, ref string, snapshot *PageSnapshot) error
ScrollToRef scrolls the element at the given ref into view without performing any other interaction.
func ScrollToY ¶ added in v0.4.0
ScrollToY scrolls the page to an absolute Y pixel position. When bottomSentinel is true, the page is scrolled to document.body.scrollHeight regardless of the y argument — use this for "scroll-to bottom". Returns the final window.scrollY as observed after the scroll.
func SelectOption ¶
SelectOption selects option(s) in a <select> element by visible text.
func SetHumanMode ¶ added in v0.7.0
func SetHumanMode(enabled bool)
SetHumanMode toggles the human-input simulation globally. When false the engine uses the original Rod fast path; when true ClickElement / TypeElement / HoverElement dispatch human-shaped events.
func SetViewport ¶
SetViewport overrides the page viewport dimensions.
func TakeScreenshot ¶
func TakeScreenshot(page *rod.Page, fullPage bool, elementRef string, quality int, snapshot *PageSnapshot) ([]byte, error)
TakeScreenshot captures the page or a specific element. If elementRef is non-empty, captures only that element. If fullPage is true, captures the full scrollable page. quality controls JPEG quality (1-100); PNG is used if quality <= 0.
func TruncateURL ¶ added in v0.2.0
TruncateURL strips common scheme/www prefixes and shortens the URL to maxLen. Used by formatted output from preview and collect.
func TypeElement ¶ added in v0.6.0
TypeElement writes text into an already-resolved element.
func TypeRef ¶
TypeRef types text into the element at the given ref. Uses focus + select all + keyboard typing to work with React/Vue/Angular.
func UploadRef ¶ added in v0.3.0
func UploadRef(page *rod.Page, ref string, selector string, files []string, snapshot *PageSnapshot) error
UploadRef sets the files on a file-input element.
The target can be identified either by:
- ref: a @N reference from the current snapshot (works when the input is a native, visible <input type=file>).
- selector: a CSS selector (use this when the visible widget is a styled button wrapping a hidden input — common pattern).
Exactly one of ref or selector must be non-empty.
func ValidateExtractLevel ¶ added in v0.2.0
func ValidateExtractLevel(level ExtractLevel) error
ValidateExtractLevel ensures the extraction level is supported.
func WaitForBotChallenge ¶ added in v0.2.0
WaitForBotChallenge detects bot-challenge pages (DataDome, Cloudflare, etc.) and waits for the challenge JS to resolve and the page to reload. Returns true if a challenge was detected and resolved.
func WaitForPage ¶ added in v0.2.0
WaitForPage applies a supported page wait strategy.
func WaitForSelector ¶
WaitForSelector waits for a CSS selector to appear in the DOM.
Types ¶
type Browser ¶
type Browser struct {
// contains filtered or unexported fields
}
Browser wraps a Rod browser with connect/launch logic.
func NewBrowser ¶
NewBrowser creates a browser instance. If connectURL is set, connects to an existing Chrome via CDP. Otherwise, auto-launches a new Chrome process.
func NewBrowserWith ¶ added in v0.7.0
func NewBrowserWith(opts BrowserOpts) (*Browser, error)
NewBrowserWith creates a browser instance with full options. Persistent user_data_dir and upstream proxy only apply in auto-launch mode (i.e., when opts.ConnectURL is empty).
func (*Browser) Close ¶
func (b *Browser) Close()
Close cleans up the browser resources. External Chrome keeps running; the CLI process owns the websocket lifetime.
func (*Browser) Connected ¶
Connected returns true if connected to external Chrome (not launched by us).
func (*Browser) CurrentTargetID ¶ added in v0.2.0
CurrentTargetID returns the persisted current tab target, if any.
func (*Browser) DeleteSnapshot ¶ added in v0.2.0
func (b *Browser) DeleteSnapshot(targetID proto.TargetTargetID) error
DeleteSnapshot removes stored ref state for a closed page target.
func (*Browser) Page ¶
Page returns the active page or creates a new one. When connected to an existing Chrome, it prefers the persisted active tab.
func (*Browser) RodBrowser ¶ added in v0.2.0
RodBrowser returns the underlying rod.Browser for advanced operations.
func (*Browser) SaveSnapshot ¶ added in v0.2.0
func (b *Browser) SaveSnapshot(page *rod.Page, result *ExtractionResult) error
SaveSnapshot persists the latest ref snapshot for the page.
func (*Browser) SetCurrentPage ¶ added in v0.2.0
SetCurrentPage marks the provided page as the current tab for the session.
type BrowserOpts ¶ added in v0.7.0
type BrowserOpts struct {
ConnectURL string
Headless bool
Invisible bool
TimeoutSec int
UserDataDir string // absolute path; ignored when ConnectURL is set
Proxy string // upstream proxy URL; ignored when ConnectURL is set
Extensions []string // absolute paths to unpacked extensions; ignored when ConnectURL is set
SystemChrome bool // force /Applications/Google Chrome.app binary; ignored when ConnectURL is set
}
BrowserOpts configures NewBrowserWith. It supersedes the positional parameters of NewBrowser and adds persistent profile + upstream proxy support for auto-launch.
type CaptureSession ¶ added in v0.7.0
type CaptureSession struct {
// contains filtered or unexported fields
}
CaptureSession owns the goroutine that listens to Network events.
func StartCapture ¶ added in v0.7.0
func StartCapture(page *rod.Page, spec CaptureSpec) (*CaptureSession, error)
StartCapture enables the Network domain and begins listening.
func (*CaptureSession) Entries ¶ added in v0.7.0
func (s *CaptureSession) Entries() []*CapturedEntry
Entries returns a snapshot of collected entries.
func (*CaptureSession) ReachedMax ¶ added in v0.7.0
func (s *CaptureSession) ReachedMax() <-chan struct{}
ReachedMax fires once Max matches have been collected.
func (*CaptureSession) Stop ¶ added in v0.7.0
func (s *CaptureSession) Stop() ([]*CapturedEntry, error)
Stop detaches listeners, closes the output file, and returns any write errors.
type CaptureSpec ¶ added in v0.7.0
type CaptureSpec struct {
// URLMatch is a regex applied to the full request URL. Empty = match all.
URLMatch string
// MimeMatch is a regex applied to the response MIME type. Empty = match all.
MimeMatch string
// Max is the number of MATCHING entries to collect before ReachedMax fires.
// 0 = unlimited.
Max int
// IncludeBody, when true, fetches the response body via
// Network.getResponseBody for every matching entry.
IncludeBody bool
// OutputPath, if set, streams each entry as it is captured (NDJSON).
OutputPath string
}
CaptureSpec configures a passive network capture session. Generic DevTools-Network-tab style recorder: no request modification, no auth bypass — just observation of what the page already requested.
type CapturedEntry ¶ added in v0.7.0
type CapturedEntry struct {
RequestID string `json:"request_id"`
Method string `json:"method"`
URL string `json:"url"`
ResourceType string `json:"resource_type"`
Status int `json:"status"`
StatusText string `json:"status_text,omitempty"`
MimeType string `json:"mime_type,omitempty"`
ReqHeaders map[string]string `json:"request_headers,omitempty"`
ResHeaders map[string]string `json:"response_headers,omitempty"`
PostData string `json:"post_data,omitempty"`
Body string `json:"body,omitempty"`
BodyBase64 bool `json:"body_base64,omitempty"`
BodyError string `json:"body_error,omitempty"`
StartedAt string `json:"started_at"`
}
CapturedEntry is one fully-hydrated request/response pair.
type CollectResult ¶ added in v0.2.0
type CollectResult struct {
PageURL string `json:"page_url"`
ItemCount int `json:"item_count"`
Items []CollectedItem `json:"items"`
}
CollectResult holds all collected items from a listing page.
func Collect ¶ added in v0.2.0
func Collect(page *rod.Page, limit int) (*CollectResult, error)
Collect auto-detects repeated listing cards on a page and extracts structured data. It finds elements containing price patterns (€, $, £), groups them by common ancestor, and extracts title, price, URL, and metadata from each card.
type CollectedItem ¶ added in v0.2.0
type CollectedItem struct {
Title string `json:"title"`
Price string `json:"price,omitempty"`
URL string `json:"url,omitempty"`
Fields map[string]string `json:"fields,omitempty"`
}
CollectedItem is a single listing item extracted from a page.
type CookieRecord ¶ added in v0.7.0
type CookieRecord struct {
Name string `json:"name"`
Value string `json:"value"`
Domain string `json:"domain"`
Path string `json:"path"`
Expires float64 `json:"expires"` // CDP unix seconds, 0 = session
Secure bool `json:"secure"`
HTTPOnly bool `json:"httpOnly"`
SameSite string `json:"sameSite,omitempty"` // "Strict"|"Lax"|"None"|""
}
CookieRecord holds the subset of fields ghostchrome injects via CDP at session start. Mirrors the relevant columns of Chrome's Cookies SQLite schema, plus the decrypted plaintext value.
func ExportDecryptedCookies ¶ added in v0.7.0
func ExportDecryptedCookies(cookiesPath string) ([]CookieRecord, error)
ExportDecryptedCookies opens the SQLite Cookies db at path, decrypts every v10-tagged encrypted_value via macOS Keychain, and returns the list as portable CookieRecord values. Source file is not modified.
func LoadCookiesJSON ¶ added in v0.7.0
func LoadCookiesJSON(profileDir string) ([]CookieRecord, error)
LoadCookiesJSON reads the snapshot if present. Returns nil, nil when the file is absent (no import done on this profile).
type Device ¶ added in v0.4.0
type Device struct {
Name string
Width int
Height int
DPR float64
UserAgent string
Mobile bool
Touch bool
}
Device describes a hardware profile used by the emulate command. Dimensions are CSS pixels; DPR is devicePixelRatio.
func DeviceByName ¶ added in v0.4.0
DeviceByName looks up a Device preset by its canonical name.
func ListDevices ¶ added in v0.4.0
func ListDevices() []Device
ListDevices returns a copy of the registered device presets.
type DialogResult ¶ added in v0.2.0
type DialogResult struct {
Handled bool `json:"handled"`
Action string `json:"action"`
Type string `json:"type,omitempty"`
Message string `json:"message,omitempty"`
URL string `json:"url,omitempty"`
DefaultPrompt string `json:"default_prompt,omitempty"`
TimedOut bool `json:"timed_out,omitempty"`
}
DialogResult describes how a JS dialog handler completed.
func HandleNextDialog ¶
func HandleNextDialog(page *rod.Page, accept bool, promptText string, timeout time.Duration) (*DialogResult, error)
HandleNextDialog waits for the next JavaScript dialog and handles it. The timeout is propagated via context so wait() unblocks cleanly on timeout and no goroutine is leaked.
type DiffNode ¶ added in v0.2.0
type DiffNode struct {
Ref string `json:"ref"`
Role string `json:"role"`
Name string `json:"name,omitempty"`
Href string `json:"href,omitempty"`
Value string `json:"value,omitempty"`
}
DiffNode is the minimal payload we return for added nodes.
type DiffStats ¶ added in v0.2.0
type DiffStats struct {
AddedCount int `json:"added"`
RemovedCount int `json:"removed"`
ChangedCount int `json:"changed"`
KeptCount int `json:"kept"`
}
DiffStats summarises a diff for agent consumption.
type ErrorCollector ¶
type ErrorCollector struct {
// contains filtered or unexported fields
}
ErrorCollector collects console-side errors from a page via CDP events.
func NewErrorCollector ¶
func NewErrorCollector(page *rod.Page) *ErrorCollector
NewErrorCollector creates a collector and starts listening on the page. It hooks into RuntimeConsoleAPICalled and RuntimeExceptionThrown. The caller must call Close() to detach the listeners.
func (*ErrorCollector) Close ¶ added in v0.7.0
func (c *ErrorCollector) Close()
Close detaches the error listeners.
func (*ErrorCollector) Errors ¶
func (c *ErrorCollector) Errors() []ErrorEntry
Errors returns all collected errors (snapshot).
type ErrorEntry ¶
type ErrorEntry struct {
Type string `json:"type"` // "console" or "network"
Level string `json:"level"` // "error", "warning", "4xx", "5xx"
Message string `json:"message"` // error message or URL
Source string `json:"source"` // file:line for console, URL for network
Status int `json:"status,omitempty"` // HTTP status for network errors
Method string `json:"method,omitempty"` // HTTP method for network
TimeMs int64 `json:"time_ms"` // timestamp relative to collector start
}
ErrorEntry represents a single console or network error.
type ExtractLevel ¶
type ExtractLevel string
ExtractLevel controls how much of the accessibility tree is returned.
const ( LevelSkeleton ExtractLevel = "skeleton" LevelContent ExtractLevel = "content" LevelFull ExtractLevel = "full" )
type ExtractedNode ¶
type ExtractedNode struct {
Ref string `json:"ref,omitempty"`
Role string `json:"role"`
Name string `json:"name,omitempty"`
Value string `json:"value,omitempty"`
Level int `json:"level,omitempty"`
Href string `json:"href,omitempty"`
Type string `json:"type,omitempty"`
Checked *bool `json:"checked,omitempty"`
Disabled bool `json:"disabled,omitempty"`
BackendNodeID proto.DOMBackendNodeID `json:"-"`
Children []ExtractedNode `json:"children,omitempty"`
}
ExtractedNode represents a filtered accessibility node.
type ExtractionResult ¶
type ExtractionResult struct {
Nodes []ExtractedNode `json:"nodes"`
Refs map[string]ExtractedNode `json:"refs"`
Stats ExtractionStats `json:"stats"`
}
ExtractionResult holds the extraction output.
func Extract ¶
func Extract(page *rod.Page, level ExtractLevel, selector string) (*ExtractionResult, error)
Extract retrieves the accessibility tree from the page and filters it.
type ExtractionStats ¶
type ExtractionStats struct {
TotalNodes int `json:"total_nodes"`
FilteredNodes int `json:"filtered_nodes"`
InteractiveCount int `json:"interactive_count"`
}
ExtractionStats provides extraction metrics.
type HAR ¶ added in v0.5.0
type HAR struct {
Log HARLog `json:"log"`
}
HAR follows the HAR 1.2 specification (W3C Web Performance Working Group). It's intentionally minimal: we only populate what NetworkEntry knows about, leaving the optional fields (headers, cookies, content text) empty so parsers treat them as "not captured" rather than "empty".
func BuildHAR ¶ added in v0.5.0
func BuildHAR(entries []NetworkEntry, pageURL, pageTitle, creatorVersion string) *HAR
BuildHAR constructs a HAR from the passive NetworkEntry slice collected by requestTracker. pageURL and pageTitle name the top-level page.
type HARContent ¶ added in v0.5.0
HARContent carries the payload summary (size, MIME).
type HARCreator ¶ added in v0.5.0
HARCreator identifies the tool that recorded the trace.
type HAREntry ¶ added in v0.5.0
type HAREntry struct {
PageRef string `json:"pageref,omitempty"`
StartedDateTime string `json:"startedDateTime"`
Time int64 `json:"time"`
Request HARRequest `json:"request"`
Response HARResponse `json:"response"`
Cache struct{} `json:"cache"`
Timings HARTimings `json:"timings"`
}
HAREntry is one network request + response.
type HARLog ¶ added in v0.5.0
type HARLog struct {
Version string `json:"version"`
Creator HARCreator `json:"creator"`
Pages []HARPage `json:"pages"`
Entries []HAREntry `json:"entries"`
}
HARLog is the top-level container.
type HARPage ¶ added in v0.5.0
type HARPage struct {
StartedDateTime string `json:"startedDateTime"`
ID string `json:"id"`
Title string `json:"title"`
}
HARPage groups entries by the top-level navigation that produced them.
type HARRequest ¶ added in v0.5.0
type HARRequest struct {
Method string `json:"method"`
URL string `json:"url"`
HTTPVersion string `json:"httpVersion"`
Headers []string `json:"headers"`
QueryString []string `json:"queryString"`
HeadersSize int `json:"headersSize"`
BodySize int `json:"bodySize"`
}
HARRequest describes the outgoing request.
type HARResponse ¶ added in v0.5.0
type HARResponse struct {
Status int `json:"status"`
StatusText string `json:"statusText"`
HTTPVersion string `json:"httpVersion"`
Headers []string `json:"headers"`
Cookies []string `json:"cookies"`
Content HARContent `json:"content"`
RedirectURL string `json:"redirectURL"`
HeadersSize int `json:"headersSize"`
BodySize int `json:"bodySize"`
}
HARResponse describes the incoming response.
type HARTimings ¶ added in v0.5.0
type HARTimings struct {
Send int `json:"send"`
Wait int64 `json:"wait"`
Receive int `json:"receive"`
}
HARTimings models per-phase durations. We only know the total so we put it on "wait".
type ImageDiffResult ¶ added in v0.6.0
type ImageDiffResult struct {
Width int `json:"width"`
Height int `json:"height"`
PixelsTotal int `json:"pixels_total"`
PixelsChanged int `json:"pixels_changed"`
DiffRatio float64 `json:"diff_ratio"`
DiffPath string `json:"diff_path,omitempty"`
Skipped bool `json:"skipped,omitempty"`
SkipReason string `json:"skip_reason,omitempty"`
}
ImageDiffResult summarises a pixel-by-pixel comparison.
func DiffImages ¶ added in v0.6.0
func DiffImages(baselinePNG, currentPNG []byte, tolerance int, diffPath string) (*ImageDiffResult, error)
DiffImages compares two images pixel by pixel. When dimensions differ, the result is flagged Skipped so the caller can decide (usually: fail or use the new image as the new baseline).
If diffPath is non-empty, a PNG is written there highlighting every pixel where delta >= tolerance in red (original otherwise).
type InterceptSession ¶ added in v0.5.0
type InterceptSession struct {
// contains filtered or unexported fields
}
InterceptSession owns the router lifetime. Stop() must be called to release resources.
func StartIntercept ¶ added in v0.5.0
func StartIntercept(browser *rod.Browser, spec InterceptSpec) (*InterceptSession, error)
StartIntercept enables Fetch interception on the browser and returns an InterceptSession. The caller is responsible for Stop().
func (*InterceptSession) Stats ¶ added in v0.5.0
func (s *InterceptSession) Stats() *InterceptStats
Stats returns the live counters.
func (*InterceptSession) Stop ¶ added in v0.5.0
func (s *InterceptSession) Stop() error
Stop disables interception and waits for the background goroutine.
type InterceptSpec ¶ added in v0.5.0
type InterceptSpec struct {
// BlockPatterns list glob URL patterns to block with
// NetworkErrorReasonBlockedByClient.
BlockPatterns []string
// FulfillPattern optionally matches requests to be answered with the
// FulfillBody payload and FulfillStatus response code. Only set one pattern.
FulfillPattern string
FulfillBody []byte
FulfillStatus int
FulfillContentType string
}
InterceptSpec configures a request interception router.
type InterceptStats ¶ added in v0.5.0
type InterceptStats struct {
Blocked int
Fulfilled int
Passed int
// contains filtered or unexported fields
}
InterceptStats are cumulative counters updated by the router goroutine.
func (*InterceptStats) Snapshot ¶ added in v0.5.0
func (s *InterceptStats) Snapshot() (blocked, fulfilled, passed int)
Snapshot returns a concurrent-safe copy of the counters.
type LauncherOpts ¶ added in v0.2.0
type LauncherOpts struct {
Headless bool
RemotePort int // 0 = random
// Invisible forces headful Chrome (real rendering pipeline, harder to
// fingerprint as a bot) but positions the window far off-screen so it
// stays out of sight. Overrides Headless when set.
Invisible bool
// UserDataDir is the absolute Chrome --user-data-dir path. Empty means
// ephemeral. Use ResolveProfileDir to convert a short profile name into
// the canonical path under ~/.ghostchrome/profiles/<name>.
UserDataDir string
// Proxy is the upstream proxy URL passed to Chrome via --proxy-server.
// Examples: "http://user:pass@host:port", "socks5://host:1080".
// Empty means no proxy.
Proxy string
// Extensions is a list of absolute paths to unpacked Chrome extensions
// (each path must contain a manifest.json at its root). When non-empty,
// Chrome is launched with --load-extension and --disable-extensions-except
// so only the listed extensions are active. Note: requires HeadlessNew
// (the modern headless mode); old --headless ignores extensions.
Extensions []string
// SystemChrome forces the launcher to use the system's Google Chrome
// binary (com.google.Chrome bundle) instead of rod's bundled Chromium.
// Required when reusing a profile imported from the user's real Chrome
// — only com.google.Chrome can decrypt cookies sealed by macOS Keychain
// "Chrome Safe Storage". Auto-detected via the .ghostchrome-system-chrome
// marker file in the profile dir.
SystemChrome bool
}
LauncherOpts configures a stealth-flavored Chrome launcher.
type Locator ¶ added in v0.6.0
type Locator struct {
// Role matches the ARIA role (see engine.interactiveRoles, engine.skeletonRoles).
// Accepts canonical role strings or their one-letter agent abbreviation
// ("b"=button, "a"=link, "t"=textbox, etc.).
Role string
// Name matches the accessible name. Comparison is case-insensitive and uses
// substring matching, so "Sign in" matches "Sign in now".
Name string
// Label matches the accessible label derived from <label for=...> or
// aria-labelledby. In Chromium's a11y tree, that's exposed as the name of
// a textbox / combobox — so this is equivalent to Name for inputs.
// Included as a separate field for ergonomic CLI flags (--by-label).
Label string
// Text matches via page-wide text search — like Playwright's getByText.
// When set, Role is ignored.
Text string
}
Locator describes a semantic element match. At least one field must be set. When multiple fields are set, the match is conjunctive (all must hold).
type MultiCollectResult ¶ added in v0.2.0
type MultiCollectResult struct {
TotalItems int `json:"total_items"`
TotalTimeMs int64 `json:"total_time_ms"`
Sites []SiteResult `json:"sites"`
}
MultiCollectResult holds results from parallel multi-URL collection.
func MultiCollect ¶ added in v0.2.0
func MultiCollect(browser *rod.Browser, urls []string, limit int, stealth bool, maxParallel int) *MultiCollectResult
MultiCollect scrapes multiple URLs in parallel using separate browser tabs. Each URL gets its own tab, navigates, collects, and closes. maxParallel caps the number of concurrent tabs; <= 0 falls back to 5.
type NetworkEntry ¶
type NetworkEntry struct {
Method string `json:"method,omitempty"`
URL string `json:"url"`
Status int `json:"status"`
Size int `json:"size_bytes"`
TimeMs int64 `json:"time_ms"`
MimeType string `json:"mime_type,omitempty"`
Error string `json:"error,omitempty"`
}
NetworkEntry represents a captured network request.
type PageInfo ¶
type PageInfo struct {
URL string `json:"url"`
Title string `json:"title"`
Status int `json:"status"`
TimeMs int64 `json:"time_ms"`
}
PageInfo holds the result of a navigation.
type PageSnapshot ¶ added in v0.2.0
type PageSnapshot struct {
TargetID string `json:"target_id"`
URL string `json:"url,omitempty"`
Title string `json:"title,omitempty"`
Refs map[string]RefSnapshot `json:"refs,omitempty"`
}
PageSnapshot stores the last known interactive refs for a page target.
func BuildSnapshot ¶ added in v0.2.0
func BuildSnapshot(page *rod.Page, result *ExtractionResult) (*PageSnapshot, error)
BuildSnapshot creates an in-memory ref snapshot from an extraction result.
type PreviewResult ¶
type PreviewResult struct {
PageInfo *PageInfo `json:"page"`
Errors []ErrorEntry `json:"errors"`
Network []NetworkEntry `json:"network"`
DOM *ExtractionResult `json:"dom"`
Summary PreviewSummary `json:"summary"`
}
PreviewResult is the all-in-one dev report for a page.
type PreviewSummary ¶
type PreviewSummary struct {
TotalRequests int `json:"total_requests"`
FailedRequests int `json:"failed_requests"`
ErrorCount int `json:"error_count"`
WarningCount int `json:"warning_count"`
InteractiveCount int `json:"interactive_count"`
}
PreviewSummary provides quick stats.
type RefSnapshot ¶ added in v0.2.0
type RefSnapshot struct {
BackendNodeID proto.DOMBackendNodeID `json:"backend_node_id"`
Role string `json:"role,omitempty"`
Name string `json:"name,omitempty"`
}
RefSnapshot stores a stable backend node mapping for a single ref.
type RenderProfile ¶ added in v0.2.0
type RenderProfile struct {
// Agent is true when the output is being consumed by an LLM agent runner
// (Claude Code, Cursor, Aider, etc.) rather than a human terminal.
Agent bool
// Format is "text" or "json".
Format string
// MaxLabelLen truncates node names / values to this length in agent mode.
// 0 means no truncation.
MaxLabelLen int
// AbbrevRoles uses 1-2 character role abbreviations (b/a/t/c/s/r/m/x/h).
AbbrevRoles bool
// DropEmptyStats omits "[errors] 0 ..." / "[network] ... 0 failed" headers
// when counts are zero.
DropEmptyStats bool
}
RenderProfile controls how output is rendered for the calling environment. It is resolved once per CLI invocation (see ResolveProfile) and then threaded into formatters.
func ProfileAgent ¶ added in v0.2.0
func ProfileAgent(format string) RenderProfile
ProfileAgent returns the compact agent-optimised profile.
func ProfileHuman ¶ added in v0.2.0
func ProfileHuman(format string) RenderProfile
ProfileHuman returns the default human-friendly profile.
func ResolveProfile ¶ added in v0.2.0
func ResolveProfile(explicit, format string) RenderProfile
ResolveProfile picks a RenderProfile from an explicit flag ("auto", "human", "agent") with environment-variable fallback for "auto".
type SiteResult ¶ added in v0.2.0
type SiteResult struct {
URL string `json:"url"`
Items []CollectedItem `json:"items"`
Count int `json:"count"`
TimeMs int64 `json:"time_ms"`
Error string `json:"error,omitempty"`
}
SiteResult holds the collect result for a single URL in a multi-collect.
type SnapshotDiff ¶ added in v0.2.0
type SnapshotDiff struct {
Unchanged bool `json:"unchanged,omitempty"`
Added []DiffNode `json:"added,omitempty"`
Removed []string `json:"removed,omitempty"`
Changed map[string]DiffEntry `json:"changed,omitempty"`
Stats DiffStats `json:"stats"`
}
SnapshotDiff reports the changes between two ref maps of a page. All fields are optional in JSON output so an unchanged page serialises to `{"unchanged":true}`.
func DiffRefs ¶ added in v0.2.0
func DiffRefs(prev, curr map[string]RefSnapshot) SnapshotDiff
DiffRefs compares two ref maps (typically the persisted PageSnapshot.Refs). Refs are reassigned in document order by the extractor, so a key match indicates the same logical node slot. A role or name change on the same key counts as "changed"; disappearing or new keys count as removed/added.
type StorageCookie ¶ added in v0.3.0
type StorageCookie struct {
Name string `json:"name"`
Value string `json:"value"`
Domain string `json:"domain"`
Path string `json:"path"`
Expires float64 `json:"expires,omitempty"`
HTTPOnly bool `json:"httpOnly,omitempty"`
Secure bool `json:"secure,omitempty"`
SameSite string `json:"sameSite,omitempty"`
}
StorageCookie uses the Playwright-compatible field names (sameSite as string, expires as float64 seconds-since-epoch).
type StorageKeyValue ¶ added in v0.3.0
StorageKeyValue is a single localStorage entry.
type StorageOrigin ¶ added in v0.3.0
type StorageOrigin struct {
Origin string `json:"origin"`
LocalStorage []StorageKeyValue `json:"localStorage"`
}
StorageOrigin groups localStorage entries by origin. sessionStorage is NOT persisted: it's per-tab and generally not replayable.
type StorageState ¶ added in v0.3.0
type StorageState struct {
Cookies []StorageCookie `json:"cookies"`
Origins []StorageOrigin `json:"origins"`
}
StorageState mirrors the Playwright storageState JSON shape so a state file produced by ghostchrome can be loaded by Playwright (and vice versa, within reasonable limits).
func SaveStorageState ¶ added in v0.3.0
SaveStorageState captures cookies (browser-wide) and localStorage for the origin of the current page. Callers can concatenate multiple SaveStorageState runs if they need multiple origins.
Source Files
¶
- browser.go
- capture.go
- collector.go
- cookies.go
- cookies_decrypt.go
- cookies_inject.go
- emulate.go
- errors.go
- extractor.go
- extractor_domfallback.go
- format.go
- har.go
- human.go
- imagediff.go
- interactor.go
- intercept.go
- locator.go
- navigator.go
- network_tracker.go
- preview.go
- profile.go
- proxy_auth.go
- refindex.go
- session_state.go
- snapshot_diff.go
- stealth.go
- storage.go
- wait.go