browser

package

v1.3.0 Latest Latest Go to latest Published: Feb 26, 2026 License: MIT Imports: 10 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/BaSui01/agentflow

Links

Open Source Insights

Documentation ¶

Index ¶

func ScreenshotToBase64(s *Screenshot) string
type Action
type ActionRecord
type AgenticAction
type AgenticBrowser
- func NewAgenticBrowser(driver BrowserDriver, vision VisionModel, config AgenticBrowserConfig, ...) *AgenticBrowser
- func (b *AgenticBrowser) ExecuteTask(ctx context.Context, task BrowserTask) (*TaskResult, error)
- func (b *AgenticBrowser) GetHistory() []ActionRecord
type AgenticBrowserConfig
- func DefaultAgenticBrowserConfig() AgenticBrowserConfig
type BoundingBox
type Browser
type BrowserCommand
type BrowserConfig
- func DefaultBrowserConfig() BrowserConfig
type BrowserDriver
type BrowserFactory
type BrowserPool
- func NewBrowserPool(config BrowserPoolConfig, logger *zap.Logger) (*BrowserPool, error)
- func (p *BrowserPool) Acquire(ctx context.Context) (*ChromeDPBrowser, error)
- func (p *BrowserPool) Close() error
- func (p *BrowserPool) Release(browser *ChromeDPBrowser)
- func (p *BrowserPool) Stats() (idle, active, total int)
type BrowserPoolConfig
- func DefaultBrowserPoolConfig() BrowserPoolConfig
type BrowserResult
type BrowserSession
- func NewBrowserSession(id string, browser Browser, config BrowserConfig, logger *zap.Logger) *BrowserSession
- func (s *BrowserSession) Click(ctx context.Context, selector string) (*BrowserResult, error)
- func (s *BrowserSession) Close() error
- func (s *BrowserSession) Execute(ctx context.Context, cmd BrowserCommand) (*BrowserResult, error)
- func (s *BrowserSession) Extract(ctx context.Context, selector string) (*BrowserResult, error)
- func (s *BrowserSession) GetHistory() []BrowserCommand
- func (s *BrowserSession) Navigate(ctx context.Context, url string) (*BrowserResult, error)
- func (s *BrowserSession) Screenshot(ctx context.Context) (*BrowserResult, error)
- func (s *BrowserSession) Type(ctx context.Context, selector, text string) (*BrowserResult, error)
- func (s *BrowserSession) Wait(ctx context.Context, selector string, timeout time.Duration) (*BrowserResult, error)
type BrowserTask
type BrowserTool
- func NewBrowserTool(factory BrowserFactory, config BrowserConfig, logger *zap.Logger) *BrowserTool
- func (t *BrowserTool) CloseAll() error
- func (t *BrowserTool) CloseSession(sessionID string) error
- func (t *BrowserTool) ExecuteCommand(ctx context.Context, sessionID string, cmd BrowserCommand) (*BrowserResult, error)
- func (t *BrowserTool) GetOrCreateSession(sessionID string) (*BrowserSession, error)
type ChromeDPBrowser
- func NewChromeDPBrowser(config BrowserConfig, logger *zap.Logger) (*ChromeDPBrowser, error)
- func (b *ChromeDPBrowser) Close() error
- func (b *ChromeDPBrowser) Execute(ctx context.Context, cmd BrowserCommand) (*BrowserResult, error)
- func (b *ChromeDPBrowser) GetState(ctx context.Context) (*PageState, error)
type ChromeDPBrowserFactory
- func NewChromeDPBrowserFactory(logger *zap.Logger) *ChromeDPBrowserFactory
- func (f *ChromeDPBrowserFactory) Create(config BrowserConfig) (Browser, error)
type ChromeDPDriver
- func NewChromeDPDriver(config BrowserConfig, logger *zap.Logger) (*ChromeDPDriver, error)
- func (d *ChromeDPDriver) Click(ctx context.Context, x, y int) error
- func (d *ChromeDPDriver) Close() error
- func (d *ChromeDPDriver) GetURL(ctx context.Context) (string, error)
- func (d *ChromeDPDriver) Navigate(ctx context.Context, url string) error
- func (d *ChromeDPDriver) Screenshot(ctx context.Context) (*Screenshot, error)
- func (d *ChromeDPDriver) Scroll(ctx context.Context, deltaX, deltaY int) error
- func (d *ChromeDPDriver) Type(ctx context.Context, text string) error
type ChromeDPDriverOption
type Element
type LLMVisionAdapter
- func NewLLMVisionAdapter(provider LLMVisionProvider, logger *zap.Logger) *LLMVisionAdapter
- func (a *LLMVisionAdapter) Analyze(ctx context.Context, screenshot *Screenshot) (*VisionAnalysis, error)
- func (a *LLMVisionAdapter) PlanActions(ctx context.Context, goal string, analysis *VisionAnalysis) ([]AgenticAction, error)
type LLMVisionProvider
type PageElement
type PageState
type Screenshot
type TaskResult
type VisionAnalysis
type VisionModel

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func ScreenshotToBase64 ¶

func ScreenshotToBase64(s *Screenshot) string

屏幕截图 ToBase64将截图转换为 Base64.

Types ¶

type Action ¶

type Action string

动作代表浏览器动作类型.

const (
	ActionNavigate   Action = "navigate"
	ActionClick      Action = "click"
	ActionType       Action = "type"
	ActionScroll     Action = "scroll"
	ActionScreenshot Action = "screenshot"
	ActionExtract    Action = "extract"
	ActionWait       Action = "wait"
	ActionSelect     Action = "select"
	ActionHover      Action = "hover"
	ActionBack       Action = "back"
	ActionForward    Action = "forward"
	ActionRefresh    Action = "refresh"
)

type ActionRecord ¶

type ActionRecord struct {
	Action     AgenticAction `json:"action"`
	Screenshot *Screenshot   `json:"screenshot,omitempty"`
	Success    bool          `json:"success"`
	Error      string        `json:"error,omitempty"`
	Timestamp  time.Time     `json:"timestamp"`
}

动作记录记录已执行动作。

type AgenticAction ¶

type AgenticAction struct {
	Type     Action         `json:"type"`
	Selector string         `json:"selector,omitempty"`
	Value    string         `json:"value,omitempty"`
	X        int            `json:"x,omitempty"`
	Y        int            `json:"y,omitempty"`
	Duration time.Duration  `json:"duration,omitempty"`
	Metadata map[string]any `json:"metadata,omitempty"`
}

AgenticAction代表了代理浏览器的浏览器动作.

type AgenticBrowser ¶

type AgenticBrowser struct {
	// contains filtered or unexported fields
}

Agentic Browser提供Vision-Action Loop浏览器自动化.

func NewAgenticBrowser ¶

func NewAgenticBrowser(driver BrowserDriver, vision VisionModel, config AgenticBrowserConfig, logger *zap.Logger) *AgenticBrowser

新代理浏览器创建了新的代理浏览器.

func (*AgenticBrowser) ExecuteTask ¶

func (b *AgenticBrowser) ExecuteTask(ctx context.Context, task BrowserTask) (*TaskResult, error)

执行任务使用 Vision-Action Loop执行任务.

func (*AgenticBrowser) GetHistory ¶

func (b *AgenticBrowser) GetHistory() []ActionRecord

GetHistory返回动作历史.

type AgenticBrowserConfig ¶

type AgenticBrowserConfig struct {
	MaxActions      int           `json:"max_actions"`
	ActionDelay     time.Duration `json:"action_delay"`
	ScreenshotDelay time.Duration `json:"screenshot_delay"`
	Timeout         time.Duration `json:"timeout"`
	RetryOnFailure  bool          `json:"retry_on_failure"`
	MaxRetries      int           `json:"max_retries"`
}

代理浏览器Config配置代理浏览器.

func DefaultAgenticBrowserConfig ¶

func DefaultAgenticBrowserConfig() AgenticBrowserConfig

默认代理浏览器 Config 返回默认配置。

type BoundingBox ¶

type BoundingBox struct {
	X      float64 `json:"x"`
	Y      float64 `json:"y"`
	Width  float64 `json:"width"`
	Height float64 `json:"height"`
}

BboundingBox代表元素位置和大小.

type Browser ¶

type Browser interface {
	// 执行运行浏览器命令 。
	Execute(ctx context.Context, cmd BrowserCommand) (*BrowserResult, error)
	// GetState 返回当前页面状态 。
	GetState(ctx context.Context) (*PageState, error)
	// 关闭浏览器 。
	Close() error
}

浏览器定义了浏览器自动化的界面.

type BrowserCommand ¶

type BrowserCommand struct {
	Action   Action            `json:"action"`
	Selector string            `json:"selector,omitempty"` // CSS selector or XPath
	Value    string            `json:"value,omitempty"`    // For type, navigate actions
	Options  map[string]string `json:"options,omitempty"`
}

浏览器Command 代表要在浏览器中执行的命令.

type BrowserConfig ¶

type BrowserConfig struct {
	Headless          bool          `json:"headless"`
	Timeout           time.Duration `json:"timeout"`
	ViewportWidth     int           `json:"viewport_width"`
	ViewportHeight    int           `json:"viewport_height"`
	UserAgent         string        `json:"user_agent,omitempty"`
	ProxyURL          string        `json:"proxy_url,omitempty"`
	ScreenshotOnError bool          `json:"screenshot_on_error"`
}

浏览器Config配置了浏览器自动化.

func DefaultBrowserConfig ¶

func DefaultBrowserConfig() BrowserConfig

默认浏览器 Config 返回合理的默认值。

type BrowserDriver ¶

type BrowserDriver interface {
	Navigate(ctx context.Context, url string) error
	Screenshot(ctx context.Context) (*Screenshot, error)
	Click(ctx context.Context, x, y int) error
	Type(ctx context.Context, text string) error
	Scroll(ctx context.Context, deltaX, deltaY int) error
	GetURL(ctx context.Context) (string, error)
	Close() error
}

浏览器Driver接口用于浏览器控制.

type BrowserFactory ¶

type BrowserFactory interface {
	Create(config BrowserConfig) (Browser, error)
}

浏览器 Factory 创建浏览器实例。

type BrowserPool ¶

type BrowserPool struct {
	// contains filtered or unexported fields
}

BrowserPool 浏览器实例池

func NewBrowserPool ¶

func NewBrowserPool(config BrowserPoolConfig, logger *zap.Logger) (*BrowserPool, error)

NewBrowserPool 创建浏览器池

func (*BrowserPool) Acquire ¶

func (p *BrowserPool) Acquire(ctx context.Context) (*ChromeDPBrowser, error)

Acquire 获取一个浏览器实例

func (*BrowserPool) Close ¶

func (p *BrowserPool) Close() error

Close 关闭浏览器池

func (*BrowserPool) Release ¶

func (p *BrowserPool) Release(browser *ChromeDPBrowser)

Release 归还浏览器实例

func (*BrowserPool) Stats ¶

func (p *BrowserPool) Stats() (idle, active, total int)

Stats 返回池统计信息

type BrowserPoolConfig ¶

type BrowserPoolConfig struct {
	MaxSize       int           `json:"max_size"`
	MinIdle       int           `json:"min_idle"`
	MaxIdleTime   time.Duration `json:"max_idle_time"`
	BrowserConfig BrowserConfig `json:"browser_config"`
}

BrowserPoolConfig 浏览器池配置

func DefaultBrowserPoolConfig ¶

func DefaultBrowserPoolConfig() BrowserPoolConfig

DefaultBrowserPoolConfig 默认池配置

type BrowserResult ¶

type BrowserResult struct {
	Success    bool            `json:"success"`
	Action     Action          `json:"action"`
	Data       json.RawMessage `json:"data,omitempty"`
	Screenshot []byte          `json:"screenshot,omitempty"`
	Error      string          `json:"error,omitempty"`
	Duration   time.Duration   `json:"duration"`
	URL        string          `json:"url,omitempty"`
	Title      string          `json:"title,omitempty"`
}

浏览器Result代表了浏览器命令的结果.

type BrowserSession ¶

type BrowserSession struct {
	// contains filtered or unexported fields
}

浏览器Session管理一个浏览器自动化会话.

func NewBrowserSession ¶

func NewBrowserSession(id string, browser Browser, config BrowserConfig, logger *zap.Logger) *BrowserSession

新浏览器会话创建一个新的浏览器会话。

func (*BrowserSession) Click ¶

func (s *BrowserSession) Click(ctx context.Context, selector string) (*BrowserResult, error)

点击元素。

func (*BrowserSession) Close ¶

func (s *BrowserSession) Close() error

关闭会话。

func (*BrowserSession) Execute ¶

func (s *BrowserSession) Execute(ctx context.Context, cmd BrowserCommand) (*BrowserResult, error)

执行命令并记录在历史上.

func (*BrowserSession) Extract ¶

func (s *BrowserSession) Extract(ctx context.Context, selector string) (*BrowserResult, error)

提取页面中的内容。

func (*BrowserSession) GetHistory ¶

func (s *BrowserSession) GetHistory() []BrowserCommand

GetHistory返回命令历史.

func (*BrowserSession) Navigate ¶

func (s *BrowserSession) Navigate(ctx context.Context, url string) (*BrowserResult, error)

导航导航到 URL 。

func (*BrowserSession) Screenshot ¶

func (s *BrowserSession) Screenshot(ctx context.Context) (*BrowserResult, error)

截图取出截图.

func (*BrowserSession) Type ¶

func (s *BrowserSession) Type(ctx context.Context, selector, text string) (*BrowserResult, error)

将文本类型输入元素。

func (*BrowserSession) Wait ¶

func (s *BrowserSession) Wait(ctx context.Context, selector string, timeout time.Duration) (*BrowserResult, error)

等待元素出现。

type BrowserTask ¶

type BrowserTask struct {
	ID           string         `json:"id"`
	Goal         string         `json:"goal"`
	StartURL     string         `json:"start_url,omitempty"`
	Instructions []string       `json:"instructions,omitempty"`
	Metadata     map[string]any `json:"metadata,omitempty"`
}

浏览器Task代表浏览器自动化任务.

type BrowserTool ¶

type BrowserTool struct {
	// contains filtered or unexported fields
}

浏览器工具将浏览器自动化包成代理工具.

func NewBrowserTool ¶

func NewBrowserTool(factory BrowserFactory, config BrowserConfig, logger *zap.Logger) *BrowserTool

NewBrowserTooll创建了一个新的浏览器工具.

func (*BrowserTool) CloseAll ¶

func (t *BrowserTool) CloseAll() error

关闭全部会话。

func (*BrowserTool) CloseSession ¶

func (t *BrowserTool) CloseSession(sessionID string) error

闭会结束某届特定会议。

func (*BrowserTool) ExecuteCommand ¶

func (t *BrowserTool) ExecuteCommand(ctx context.Context, sessionID string, cmd BrowserCommand) (*BrowserResult, error)

执行Command在会话中执行浏览器命令.

func (*BrowserTool) GetOrCreateSession ¶

func (t *BrowserTool) GetOrCreateSession(sessionID string) (*BrowserSession, error)

Get OrCreate Session 获取或创建浏览器会话。

type ChromeDPBrowser ¶

type ChromeDPBrowser struct {
	// contains filtered or unexported fields
}

ChromeDPBrowser 实现 Browser 接口

func NewChromeDPBrowser ¶

func NewChromeDPBrowser(config BrowserConfig, logger *zap.Logger) (*ChromeDPBrowser, error)

NewChromeDPBrowser 创建 ChromeDPBrowser

func (*ChromeDPBrowser) Close ¶

func (b *ChromeDPBrowser) Close() error

Close 关闭浏览器

func (*ChromeDPBrowser) Execute ¶

func (b *ChromeDPBrowser) Execute(ctx context.Context, cmd BrowserCommand) (*BrowserResult, error)

Execute 执行浏览器命令

func (*ChromeDPBrowser) GetState ¶

func (b *ChromeDPBrowser) GetState(ctx context.Context) (*PageState, error)

GetState 获取页面状态

type ChromeDPBrowserFactory ¶

type ChromeDPBrowserFactory struct {
	// contains filtered or unexported fields
}

ChromeDPBrowserFactory 实现 BrowserFactory 接口

func NewChromeDPBrowserFactory ¶

func NewChromeDPBrowserFactory(logger *zap.Logger) *ChromeDPBrowserFactory

NewChromeDPBrowserFactory 创建工厂

func (*ChromeDPBrowserFactory) Create ¶

func (f *ChromeDPBrowserFactory) Create(config BrowserConfig) (Browser, error)

Create 创建浏览器实例

type ChromeDPDriver ¶

type ChromeDPDriver struct {
	// contains filtered or unexported fields
}

ChromeDPDriver 基于 chromedp 的 BrowserDriver 实现

func NewChromeDPDriver ¶

func NewChromeDPDriver(config BrowserConfig, logger *zap.Logger) (*ChromeDPDriver, error)

NewChromeDPDriver 创建 chromedp 驱动

func (*ChromeDPDriver) Click ¶

func (d *ChromeDPDriver) Click(ctx context.Context, x, y int) error

Click 点击指定坐标

func (*ChromeDPDriver) Close ¶

func (d *ChromeDPDriver) Close() error

Close 关闭浏览器

func (*ChromeDPDriver) GetURL ¶

func (d *ChromeDPDriver) GetURL(ctx context.Context) (string, error)

GetURL 获取当前 URL

func (*ChromeDPDriver) Navigate ¶

func (d *ChromeDPDriver) Navigate(ctx context.Context, url string) error

Navigate 导航到 URL

func (*ChromeDPDriver) Screenshot ¶

func (d *ChromeDPDriver) Screenshot(ctx context.Context) (*Screenshot, error)

Screenshot 截取页面截图

func (*ChromeDPDriver) Scroll ¶

func (d *ChromeDPDriver) Scroll(ctx context.Context, deltaX, deltaY int) error

Scroll 滚动页面

func (*ChromeDPDriver) Type ¶

func (d *ChromeDPDriver) Type(ctx context.Context, text string) error

Type 输入文本

type ChromeDPDriverOption ¶

type ChromeDPDriverOption func(*ChromeDPDriver)

ChromeDPDriverOption 配置选项

type Element ¶

type Element struct {
	ID         string  `json:"id"`
	Type       string  `json:"type"` // button, input, link, text, image
	Text       string  `json:"text,omitempty"`
	X          int     `json:"x"`
	Y          int     `json:"y"`
	Width      int     `json:"width"`
	Height     int     `json:"height"`
	Clickable  bool    `json:"clickable"`
	Confidence float64 `json:"confidence"`
}

元素代表被检测到的UI元素.

type LLMVisionAdapter ¶

type LLMVisionAdapter struct {
	// contains filtered or unexported fields
}

LLMVisionAdapter 将 LLM 视觉能力适配为 VisionModel 接口

func NewLLMVisionAdapter ¶

func NewLLMVisionAdapter(provider LLMVisionProvider, logger *zap.Logger) *LLMVisionAdapter

NewLLMVisionAdapter 创建视觉适配器

func (*LLMVisionAdapter) Analyze ¶

func (a *LLMVisionAdapter) Analyze(ctx context.Context, screenshot *Screenshot) (*VisionAnalysis, error)

Analyze 分析截图

func (*LLMVisionAdapter) PlanActions ¶

func (a *LLMVisionAdapter) PlanActions(ctx context.Context, goal string, analysis *VisionAnalysis) ([]AgenticAction, error)

PlanActions 规划下一步操作

type LLMVisionProvider ¶

type LLMVisionProvider interface {
	// AnalyzeImage 分析图片，返回 JSON 格式的分析结果
	AnalyzeImage(ctx context.Context, imageBase64 string, prompt string) (string, error)
}

LLMVisionProvider LLM 视觉能力提供者接口

type PageElement ¶

type PageElement struct {
	ID          string            `json:"id,omitempty"`
	Tag         string            `json:"tag"`
	Text        string            `json:"text,omitempty"`
	Selector    string            `json:"selector"`
	Type        string            `json:"type,omitempty"` // button, input, link, etc.
	Visible     bool              `json:"visible"`
	Attrs       map[string]string `json:"attrs,omitempty"`
	BoundingBox *BoundingBox      `json:"bounding_box,omitempty"`
}

PageElement代表了页面上的互动元素.

type PageState ¶

type PageState struct {
	URL          string            `json:"url"`
	Title        string            `json:"title"`
	Content      string            `json:"content,omitempty"`  // Simplified DOM or text content
	Elements     []PageElement     `json:"elements,omitempty"` // Interactive elements
	Screenshot   []byte            `json:"screenshot,omitempty"`
	Cookies      map[string]string `json:"cookies,omitempty"`
	LocalStorage map[string]string `json:"local_storage,omitempty"`
}

PageState 代表浏览器页面的当前状态.

type Screenshot ¶

type Screenshot struct {
	Data      []byte    `json:"data"`
	Width     int       `json:"width"`
	Height    int       `json:"height"`
	Timestamp time.Time `json:"timestamp"`
	URL       string    `json:"url"`
}

屏幕截图代表了浏览器截图.

type TaskResult ¶

type TaskResult struct {
	TaskID    string         `json:"task_id"`
	Success   bool           `json:"success"`
	Actions   []ActionRecord `json:"actions"`
	StartTime time.Time      `json:"start_time"`
	EndTime   time.Time      `json:"end_time"`
	Duration  time.Duration  `json:"duration"`
	Error     string         `json:"error,omitempty"`
}

TaskResult代表了浏览器任务的结果.

type VisionAnalysis ¶

type VisionAnalysis struct {
	Elements    []Element `json:"elements"`
	PageTitle   string    `json:"page_title"`
	PageType    string    `json:"page_type"`
	Description string    `json:"description"`
	Suggestions []string  `json:"suggestions,omitempty"`
}

远景分析是远景模型分析的结果。

type VisionModel ¶

type VisionModel interface {
	Analyze(ctx context.Context, screenshot *Screenshot) (*VisionAnalysis, error)
	PlanActions(ctx context.Context, goal string, analysis *VisionAnalysis) ([]AgenticAction, error)
}

VisionModel分析截图.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL