agent

package

v0.1.2 Latest Latest Go to latest Published: Jun 10, 2025 License: MIT Imports: 26 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/nerdface-ai/browser-use-go

Links

Open Source Insights

Documentation ¶

Index ¶

Variables
func GetInteractedElement(modelOutput *AgentOutput, selectorMap *dom.SelectorMap) []*dom.DOMHistoryElement
func ToolInfoWithCustomActions(customActions *controller.ActionModel) *schema.ToolInfo
func ValidationOutputSchema() *openapi3.Schema
type AIMessageArguments
type ActionResult
type Agent
- func NewAgent(task string, llm model.ToolCallingChatModel, options ...AgentOption) *Agent
- func (ag *Agent) Close()
- func (ag *Agent) Run(opts ...AgentRunOption) (*AgentHistoryList, error)
type AgentBrain
type AgentHistory
- func (ah *AgentHistory) ModelDump() map[string]interface{}
type AgentHistoryList
- func (ahl *AgentHistoryList) IsDone() bool
- func (ahl *AgentHistoryList) IsSuccessful() *bool
- func (ahl *AgentHistoryList) LastResult() *ActionResult
- func (ahl *AgentHistoryList) ModelDump() map[string]interface{}
- func (ahl *AgentHistoryList) TotalInputTokens() int
type AgentMessagePrompt
- func NewAgentMessagePrompt(state *browser.BrowserState, result []*controller.ActionResult, ...) *AgentMessagePrompt
- func (amp *AgentMessagePrompt) GetUserMessage(useVision bool) *schema.Message
type AgentOption
- func WithAgentSettings(settings AgentSettingsConfig) AgentOption
- func WithBrowser(b *browser.Browser) AgentOption
- func WithBrowserConfig(b browser.BrowserConfig) AgentOption
- func WithBrowserContext(b *browser.BrowserContext) AgentOption
- func WithController(c *controller.Controller) AgentOption
- func WithInitialActions(actions []map[string]interface{}) AgentOption
- func WithInjectedAgentState(state *AgentState) AgentOption
- func WithRegisterDoneCallback(callback func(history *AgentHistoryList)) AgentOption
- func WithRegisterExternalAgentStatusRaiseErrorCallback(callback func() bool) AgentOption
- func WithRegisterNewStepCallback(callback func(state *browser.BrowserState, output *AgentOutput, n int)) AgentOption
- func WithSensitiveData(data map[string]string) AgentOption
- func WithValidateLLM(validateLLM model.BaseChatModel) AgentOption
type AgentOptions
type AgentOutput
- func (ao *AgentOutput) ToString() string
type AgentRunOption
- func WithAutoClose(autoClose bool) AgentRunOption
- func WithMaxSteps(n int) AgentRunOption
- func WithOnStepEnd(cb func(*Agent)) AgentRunOption
- func WithOnStepStart(cb func(*Agent)) AgentRunOption
type AgentSettings
- func NewAgentSettings(config AgentSettingsConfig) *AgentSettings
type AgentSettingsConfig
type AgentState
- func NewAgentState() *AgentState
type AgentStepInfo
- func (asi *AgentStepInfo) IsLastStep() bool
type BrowserStateHistory
type CurrentState
type ManagedMessage
type MessageHistory
- func (m *MessageHistory) AddMessage(message *schema.Message, metadata *MessageMetadata, position *int)
- func (m *MessageHistory) AddModelOutput(output *AgentOutput)
- func (m *MessageHistory) GetMessages() []*schema.Message
- func (m *MessageHistory) GetTotalTokens() int
- func (m *MessageHistory) RemoveLastStateMessage()
- func (m *MessageHistory) RemoveOldestMessage()
type MessageManager
- func NewMessageManager(task string, systemPrompt *schema.Message, settings *MessageManagerSettings, ...) *MessageManager
- func (m *MessageManager) AddMessageWithTokens(message *schema.Message, position *int, messageType *string)
- func (m *MessageManager) AddModelOutput(output *AgentOutput)
- func (m *MessageManager) AddNewTask(newTask string)
- func (m *MessageManager) AddPlan(plan *string, position *int) error
- func (m *MessageManager) AddStateMessage(state *browser.BrowserState, result []*controller.ActionResult, ...)
- func (m *MessageManager) CutMessages() error
- func (m *MessageManager) GetMessages() []*schema.Message
- func (m *MessageManager) RemoveLastStateMessage() error
- func (m *MessageManager) SaveConversation(inputMessages []*schema.Message, modelOutput *AgentOutput, target string) error
type MessageManagerConfig
type MessageManagerSettings
- func NewMessageManagerSettings(config MessageManagerConfig) *MessageManagerSettings
type MessageManagerState
- func NewMessageManagerState() *MessageManagerState
type MessageMetadata
type StepMetadata
- func (sm *StepMetadata) DurationSeconds() float64
type SystemPrompt
- func NewSystemPrompt(actionDescription string, maxActionsPerStep int, overrideSystemMessage *string, ...) *SystemPrompt
type ToolCallingMethod

Constants ¶

This section is empty.

Variables ¶

View Source

var REQUIRED_LLM_API_ENV_VARS = map[string][]string{"ChatOpenAI": {"OPENAI_API_KEY"}, "AzureOpenAI": {"AZURE_ENDPOINT", "AZURE_OPENAI_API_KEY"}, "ChatBedrockConverse": {"ANTHROPIC_API_KEY"}, "ChatAnthropic": {"ANTHROPIC_API_KEY"}, "ChatGoogleGenerativeAI": {"GEMINI_API_KEY"}, "ChatDeepSeek": {"DEEPSEEK_API_KEY"}, "ChatOllama": {}, "ChatGrok": {"GROK_API_KEY"}}

2. REQUIRED_LLM_API_ENV_VARS: map[string][]string 으로 선언

Functions ¶

func GetInteractedElement ¶

func GetInteractedElement(modelOutput *AgentOutput, selectorMap *dom.SelectorMap) []*dom.DOMHistoryElement

func ToolInfoWithCustomActions ¶

func ToolInfoWithCustomActions(customActions *controller.ActionModel) *schema.ToolInfo

func ValidationOutputSchema ¶ added in v0.1.2

func ValidationOutputSchema() *openapi3.Schema

Types ¶

type AIMessageArguments ¶

type AIMessageArguments struct {
	CurrentState CurrentState             `json:"current_state"`
	Actions      []map[string]interface{} `json:"actions"`
}

type ActionResult ¶

type ActionResult = controller.ActionResult

type Agent ¶

type Agent struct {
	Task                   string
	LLM                    model.ToolCallingChatModel
	Controller             *controller.Controller
	SensitiveData          map[string]string
	Settings               *AgentSettings
	State                  *AgentState
	InjectedBrowser        bool
	InjectedBrowserContext bool
	Browser                *browser.Browser
	BrowserContext         *browser.BrowserContext

	// model
	ValidateLLM             model.BaseChatModel
	ChatModelLibrary        string
	ModelName               string // e.g., openai, googleai, anthropic, huggingface
	PlannerModelName        string
	PageExtractionModelName string

	RegisterNewStepCallback                       func(state *browser.BrowserState, output *AgentOutput, n int)
	RegisterDoneCallback                          func(history *AgentHistoryList)
	RegisterExternalAgentStatusRaiseErrorCallback func() bool

	ToolCallingMethod *ToolCallingMethod `json:"tool_calling_method,omitempty"`

	ActionModel     *controller.ActionModel
	AgentOutput     *schema.ToolInfo
	DoneActionModel *controller.ActionModel
	DoneAgentOutput *schema.ToolInfo

	MessageManager *MessageManager

	UnfilteredActions string
	InitialActions    []*controller.ActModel
}

func NewAgent ¶

func NewAgent(
	task string,
	llm model.ToolCallingChatModel,
	options ...AgentOption,

) *Agent

if you want to specify config, fill in field AgentSettings to NewAgent To provide custom configuration, pass an AgentSettings instance to NewAgent fuction. e.g.,

NewAgentSettings(AgentSettingsConfig{
	"use_vision": true,
	"use_vision_for_planner": true,
	"save_conversation_path": "./conversation.json",
	...
})

func (*Agent) Close ¶

func (ag *Agent) Close()

Close all resources

func (*Agent) Run ¶

func (ag *Agent) Run(opts ...AgentRunOption) (*AgentHistoryList, error)

Run executes the agent for up to maxSteps (default 10), using functional options for callbacks

func (*AgentHistory) ModelDump ¶

func (ah *AgentHistory) ModelDump() map[string]interface{}

type AgentHistoryList ¶

type AgentHistoryList struct {
	History []*AgentHistory `json:"history"`
}

func (*AgentHistoryList) IsDone ¶

func (ahl *AgentHistoryList) IsDone() bool

func (*AgentHistoryList) IsSuccessful ¶

func (ahl *AgentHistoryList) IsSuccessful() *bool

func (*AgentHistoryList) LastResult ¶

func (ahl *AgentHistoryList) LastResult() *ActionResult

func (*AgentHistoryList) ModelDump ¶

func (ahl *AgentHistoryList) ModelDump() map[string]interface{}

func (*AgentHistoryList) TotalInputTokens ¶

func (ahl *AgentHistoryList) TotalInputTokens() int

type AgentMessagePrompt ¶

type AgentMessagePrompt struct {
	State             *browser.BrowserState
	Result            []*controller.ActionResult
	IncludeAttributes []string
	StepInfo          *AgentStepInfo
}

func NewAgentMessagePrompt ¶

func NewAgentMessagePrompt(
	state *browser.BrowserState,
	result []*controller.ActionResult,
	includeAttributes []string,
	stepInfo *AgentStepInfo,
) *AgentMessagePrompt

func (*AgentMessagePrompt) GetUserMessage ¶

func (amp *AgentMessagePrompt) GetUserMessage(useVision bool) *schema.Message

type AgentOption ¶

type AgentOption func(*AgentOptions)

func WithAgentSettings ¶

func WithAgentSettings(settings AgentSettingsConfig) AgentOption

func WithBrowser ¶

func WithBrowser(b *browser.Browser) AgentOption

func WithBrowserConfig ¶ added in v0.1.1

func WithBrowserConfig(b browser.BrowserConfig) AgentOption

func WithBrowserContext ¶

func WithBrowserContext(b *browser.BrowserContext) AgentOption

func WithController ¶

func WithController(c *controller.Controller) AgentOption

func WithInitialActions ¶

func WithInitialActions(actions []map[string]interface{}) AgentOption

func WithInjectedAgentState ¶

func WithInjectedAgentState(state *AgentState) AgentOption

func WithRegisterDoneCallback ¶

func WithRegisterDoneCallback(callback func(history *AgentHistoryList)) AgentOption

func WithRegisterExternalAgentStatusRaiseErrorCallback ¶

func WithRegisterExternalAgentStatusRaiseErrorCallback(callback func() bool) AgentOption

func WithRegisterNewStepCallback ¶

func WithRegisterNewStepCallback(callback func(state *browser.BrowserState, output *AgentOutput, n int)) AgentOption

func WithSensitiveData ¶

func WithSensitiveData(data map[string]string) AgentOption

func WithValidateLLM ¶ added in v0.1.2

func WithValidateLLM(validateLLM model.BaseChatModel) AgentOption

type AgentOptions ¶

type AgentOptions struct {
	ValidateLLM model.BaseChatModel
	// contains filtered or unexported fields
}

type AgentOutput ¶

type AgentOutput struct {
	CurrentState *AgentBrain            `json:"current_state"`
	Actions      []*controller.ActModel `json:"actions" jsonschema:"minItems=1"` // List of actions to execute
}

Output model for agent @dev note: this model is extended with custom actions in AgentService. You can also use some fields that are not in this model as provided by the linter, as long as they are registered in the DynamicActions model.

func (*AgentOutput) ToString ¶

func (ao *AgentOutput) ToString() string

type AgentRunOption ¶ added in v0.1.1

type AgentRunOption func(*agentRunOptions)

AgentRunOption defines a functional option for Agent.Run

func WithAutoClose ¶ added in v0.1.1

func WithAutoClose(autoClose bool) AgentRunOption

WithAutoClose sets whether to automatically close the browser after running the agent

func WithMaxSteps ¶ added in v0.1.1

func WithMaxSteps(n int) AgentRunOption

WithMaxSteps sets the maximum number of steps for Agent.Run

func WithOnStepEnd ¶ added in v0.1.1

func WithOnStepEnd(cb func(*Agent)) AgentRunOption

WithOnStepEnd sets a callback to be called at the end of each step

func WithOnStepStart ¶ added in v0.1.1

func WithOnStepStart(cb func(*Agent)) AgentRunOption

WithOnStepStart sets a callback to be called at the start of each step

type AgentSettings ¶

type AgentSettings struct {
	UseVision            bool    `json:"use_vision"`
	UseVisionForPlanner  bool    `json:"use_vision_for_planner"`
	SaveConversationPath *string `json:"save_conversation_path,omitempty"`
	MaxFailures          int     `json:"max_failures"`
	RetryDelay           int     `json:"retry_delay"`
	MaxInputTokens       int     `json:"max_input_tokens"`
	// ValidateOutput        bool                       `json:"validate_output"` replace to ValidateLLM
	MessageContext        *string                    `json:"message_context,omitempty"`
	GenerateGif           bool                       `json:"generate_gif"`
	AvailableFilePaths    []string                   `json:"available_file_paths"`
	OverrideSystemMessage *string                    `json:"override_system_message,omitempty"`
	ExtendSystemMessage   *string                    `json:"extend_system_message,omitempty"`
	IncludeAttributes     []string                   `json:"include_attributes"`
	MaxActionsPerStep     int                        `json:"max_actions_per_step"`
	ToolCallingMethod     *ToolCallingMethod         `json:"tool_calling_method,omitempty"`
	PageExtractionLLM     model.ToolCallingChatModel `json:"page_extraction_llm"`
	PlannerLLM            model.ToolCallingChatModel `json:"planner_llm"`
	PlannerInterval       int                        `json:"planner_interval"`
	IsPlannerReasoning    bool                       `json:"is_planner_reasoning"`

	// Procedural memory settings
	EnableMemory   bool                   `json:"enable_memory"`
	MemoryInterval int                    `json:"memory_interval"`
	MemoryConfig   map[string]interface{} `json:"memory_config"`
}

Options for the agent

func NewAgentSettings ¶

func NewAgentSettings(config AgentSettingsConfig) *AgentSettings

type AgentSettingsConfig ¶

type AgentSettingsConfig map[string]interface{}

type AgentState ¶

type AgentState struct {
	AgentId             string                     `json:"agent_id"`
	NSteps              int                        `json:"n_steps"`
	ConsecutiveFailures int                        `json:"consecutive_failures"`
	LastResult          []*controller.ActionResult `json:"last_result"`
	History             *AgentHistoryList          `json:"history"`
	LastPlan            *string                    `json:"last_plan,omitempty"`
	Paused              bool                       `json:"paused"`
	Stopped             bool                       `json:"stopped"`
	MessageManagerState *MessageManagerState       `json:"message_manager_state"`
}

Holds all state information for an Agent

func NewAgentState ¶

func NewAgentState() *AgentState

type AgentStepInfo ¶

type AgentStepInfo struct {
	StepNumber int
	MaxSteps   int
}

func (*AgentStepInfo) IsLastStep ¶

func (asi *AgentStepInfo) IsLastStep() bool

type BrowserStateHistory ¶

type BrowserStateHistory = browser.BrowserStateHistory

type CurrentState ¶

type CurrentState struct {
	EvaluationPreviousGoal string `json:"evaluation_previous_goal"`
	Memory                 string `json:"memory"`
	NextGoal               string `json:"next_goal"`
}

AgentBrain

type ManagedMessage ¶

type ManagedMessage struct {
	Message  *schema.Message  `json:"message"`
	Metadata *MessageMetadata `json:"metadata"`
}

type MessageHistory ¶

type MessageHistory struct {
	Messages      []ManagedMessage `json:"messages"`
	CurrentTokens int              `json:"current_tokens"`
}

func (*MessageHistory) AddMessage ¶

func (m *MessageHistory) AddMessage(message *schema.Message, metadata *MessageMetadata, position *int)

func (*MessageHistory) AddModelOutput ¶

func (m *MessageHistory) AddModelOutput(output *AgentOutput)

func (*MessageHistory) GetMessages ¶

func (m *MessageHistory) GetMessages() []*schema.Message

func (*MessageHistory) GetTotalTokens ¶

func (m *MessageHistory) GetTotalTokens() int

func (*MessageHistory) RemoveLastStateMessage ¶

func (m *MessageHistory) RemoveLastStateMessage()

func (*MessageHistory) RemoveOldestMessage ¶

func (m *MessageHistory) RemoveOldestMessage()

type MessageManager ¶

type MessageManager struct {
	Task         string
	SystemPrompt *schema.Message
	Settings     *MessageManagerSettings
	State        *MessageManagerState
}

func NewMessageManager ¶

func NewMessageManager(
	task string,
	systemPrompt *schema.Message,
	settings *MessageManagerSettings,
	state *MessageManagerState,
) *MessageManager

func (*MessageManager) AddMessageWithTokens ¶

func (m *MessageManager) AddMessageWithTokens(
	message *schema.Message,
	position *int,
	messageType *string,
)

func (*MessageManager) AddModelOutput ¶

func (m *MessageManager) AddModelOutput(output *AgentOutput)

func (*MessageManager) AddNewTask ¶

func (m *MessageManager) AddNewTask(newTask string)

func (*MessageManager) AddPlan ¶

func (m *MessageManager) AddPlan(plan *string, position *int) error

func (*MessageManager) AddStateMessage ¶

func (m *MessageManager) AddStateMessage(
	state *browser.BrowserState,
	result []*controller.ActionResult,
	stepInfo *AgentStepInfo,
	useVision bool,
)

func (*MessageManager) CutMessages ¶

func (m *MessageManager) CutMessages() error

func (*MessageManager) GetMessages ¶

func (m *MessageManager) GetMessages() []*schema.Message

func (*MessageManager) RemoveLastStateMessage ¶

func (m *MessageManager) RemoveLastStateMessage() error

func (*MessageManager) SaveConversation ¶

func (m *MessageManager) SaveConversation(
	inputMessages []*schema.Message,
	modelOutput *AgentOutput,
	target string,
) error

type MessageManagerConfig ¶

type MessageManagerConfig map[string]interface{}

type MessageManagerSettings ¶

type MessageManagerSettings struct {
	MaxInputTokens              int               `json:"max_input_tokens"`
	EstimatedCharactersPerToken int               `json:"estimated_characters_per_token"`
	ImageTokens                 int               `json:"image_tokens"`
	IncludeAttributes           []string          `json:"include_attributes"`
	MessageContext              *string           `json:"message_context,omitempty"`
	SensitiveData               map[string]string `json:"sensitive_data"`
	AvailableFilePaths          []string          `json:"available_file_paths"`
}

func NewMessageManagerSettings ¶

func NewMessageManagerSettings(config MessageManagerConfig) *MessageManagerSettings

type MessageManagerState ¶

type MessageManagerState struct {
	History *MessageHistory
	ToolId  int
}

func NewMessageManagerState ¶

func NewMessageManagerState() *MessageManagerState

type MessageMetadata ¶

type MessageMetadata struct {
	Tokens      int     `json:"tokens"`
	MessageType *string `json:"message_type,omitempty"`
}

type StepMetadata ¶

type StepMetadata struct {
	StepStartTime float64
	StepEndTime   float64
	InputTokens   int
	StepNumber    int
}

Metadata for a single step including timing and token information

func (*StepMetadata) DurationSeconds ¶

func (sm *StepMetadata) DurationSeconds() float64

Calculate step duration in seconds

type SystemPrompt ¶

type SystemPrompt struct {
	SystemMessage            *schema.Message
	DefaultActionDescription string
	MaxActionsPerStep        int
}

func NewSystemPrompt ¶

func NewSystemPrompt(
	actionDescription string,
	maxActionsPerStep int,
	overrideSystemMessage *string,
	extendSystemMessage *string,
) *SystemPrompt

type ToolCallingMethod ¶

type ToolCallingMethod string

const (
	FunctionCalling ToolCallingMethod = "function_calling"
	JSONMode        ToolCallingMethod = "json_mode"
	Raw             ToolCallingMethod = "raw"
	Auto            ToolCallingMethod = "auto"
)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL