agent

package
v0.1.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 10, 2025 License: MIT Imports: 26 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var REQUIRED_LLM_API_ENV_VARS = map[string][]string{"ChatOpenAI": {"OPENAI_API_KEY"}, "AzureOpenAI": {"AZURE_ENDPOINT", "AZURE_OPENAI_API_KEY"}, "ChatBedrockConverse": {"ANTHROPIC_API_KEY"}, "ChatAnthropic": {"ANTHROPIC_API_KEY"}, "ChatGoogleGenerativeAI": {"GEMINI_API_KEY"}, "ChatDeepSeek": {"DEEPSEEK_API_KEY"}, "ChatOllama": {}, "ChatGrok": {"GROK_API_KEY"}}

2. REQUIRED_LLM_API_ENV_VARS: map[string][]string 으로 선언

Functions

func GetInteractedElement

func GetInteractedElement(modelOutput *AgentOutput, selectorMap *dom.SelectorMap) []*dom.DOMHistoryElement

func ToolInfoWithCustomActions

func ToolInfoWithCustomActions(customActions *controller.ActionModel) *schema.ToolInfo

func ValidationOutputSchema added in v0.1.2

func ValidationOutputSchema() *openapi3.Schema

Types

type AIMessageArguments

type AIMessageArguments struct {
	CurrentState CurrentState             `json:"current_state"`
	Actions      []map[string]interface{} `json:"actions"`
}

type ActionResult

type ActionResult = controller.ActionResult

type Agent

type Agent struct {
	Task                   string
	LLM                    model.ToolCallingChatModel
	Controller             *controller.Controller
	SensitiveData          map[string]string
	Settings               *AgentSettings
	State                  *AgentState
	InjectedBrowser        bool
	InjectedBrowserContext bool
	Browser                *browser.Browser
	BrowserContext         *browser.BrowserContext

	// model
	ValidateLLM             model.BaseChatModel
	ChatModelLibrary        string
	ModelName               string // e.g., openai, googleai, anthropic, huggingface
	PlannerModelName        string
	PageExtractionModelName string

	RegisterNewStepCallback                       func(state *browser.BrowserState, output *AgentOutput, n int)
	RegisterDoneCallback                          func(history *AgentHistoryList)
	RegisterExternalAgentStatusRaiseErrorCallback func() bool

	ToolCallingMethod *ToolCallingMethod `json:"tool_calling_method,omitempty"`

	ActionModel     *controller.ActionModel
	AgentOutput     *schema.ToolInfo
	DoneActionModel *controller.ActionModel
	DoneAgentOutput *schema.ToolInfo

	MessageManager *MessageManager

	UnfilteredActions string
	InitialActions    []*controller.ActModel
}

func NewAgent

func NewAgent(
	task string,
	llm model.ToolCallingChatModel,
	options ...AgentOption,

) *Agent

if you want to specify config, fill in field AgentSettings to NewAgent To provide custom configuration, pass an AgentSettings instance to NewAgent fuction. e.g.,

NewAgentSettings(AgentSettingsConfig{
	"use_vision": true,
	"use_vision_for_planner": true,
	"save_conversation_path": "./conversation.json",
	...
})

func (*Agent) Close

func (ag *Agent) Close()

Close all resources

func (*Agent) Run

func (ag *Agent) Run(opts ...AgentRunOption) (*AgentHistoryList, error)

Run executes the agent for up to maxSteps (default 10), using functional options for callbacks

type AgentBrain

type AgentBrain struct {
	EvaluationPreviousGoal string `json:"evaluation_previous_goal"`
	Memory                 string `json:"memory"`
	NextGoal               string `json:"next_goal"`
}

Current state of the agent

type AgentHistory

type AgentHistory struct {
	ModelOutput *AgentOutput         `json:"model_output"`
	Result      []*ActionResult      `json:"result"`
	State       *BrowserStateHistory `json:"state"`
	Metadata    *StepMetadata        `json:"metadata"`
}

History item for agent actions

func (*AgentHistory) ModelDump

func (ah *AgentHistory) ModelDump() map[string]interface{}

type AgentHistoryList

type AgentHistoryList struct {
	History []*AgentHistory `json:"history"`
}

func (*AgentHistoryList) IsDone

func (ahl *AgentHistoryList) IsDone() bool

func (*AgentHistoryList) IsSuccessful

func (ahl *AgentHistoryList) IsSuccessful() *bool

func (*AgentHistoryList) LastResult

func (ahl *AgentHistoryList) LastResult() *ActionResult

func (*AgentHistoryList) ModelDump

func (ahl *AgentHistoryList) ModelDump() map[string]interface{}

func (*AgentHistoryList) TotalInputTokens

func (ahl *AgentHistoryList) TotalInputTokens() int

type AgentMessagePrompt

type AgentMessagePrompt struct {
	State             *browser.BrowserState
	Result            []*controller.ActionResult
	IncludeAttributes []string
	StepInfo          *AgentStepInfo
}

func NewAgentMessagePrompt

func NewAgentMessagePrompt(
	state *browser.BrowserState,
	result []*controller.ActionResult,
	includeAttributes []string,
	stepInfo *AgentStepInfo,
) *AgentMessagePrompt

func (*AgentMessagePrompt) GetUserMessage

func (amp *AgentMessagePrompt) GetUserMessage(useVision bool) *schema.Message

type AgentOption

type AgentOption func(*AgentOptions)

func WithAgentSettings

func WithAgentSettings(settings AgentSettingsConfig) AgentOption

func WithBrowser

func WithBrowser(b *browser.Browser) AgentOption

func WithBrowserConfig added in v0.1.1

func WithBrowserConfig(b browser.BrowserConfig) AgentOption

func WithBrowserContext

func WithBrowserContext(b *browser.BrowserContext) AgentOption

func WithController

func WithController(c *controller.Controller) AgentOption

func WithInitialActions

func WithInitialActions(actions []map[string]interface{}) AgentOption

func WithInjectedAgentState

func WithInjectedAgentState(state *AgentState) AgentOption

func WithRegisterDoneCallback

func WithRegisterDoneCallback(callback func(history *AgentHistoryList)) AgentOption

func WithRegisterExternalAgentStatusRaiseErrorCallback

func WithRegisterExternalAgentStatusRaiseErrorCallback(callback func() bool) AgentOption

func WithRegisterNewStepCallback

func WithRegisterNewStepCallback(callback func(state *browser.BrowserState, output *AgentOutput, n int)) AgentOption

func WithSensitiveData

func WithSensitiveData(data map[string]string) AgentOption

func WithValidateLLM added in v0.1.2

func WithValidateLLM(validateLLM model.BaseChatModel) AgentOption

type AgentOptions

type AgentOptions struct {
	ValidateLLM model.BaseChatModel
	// contains filtered or unexported fields
}

type AgentOutput

type AgentOutput struct {
	CurrentState *AgentBrain            `json:"current_state"`
	Actions      []*controller.ActModel `json:"actions" jsonschema:"minItems=1"` // List of actions to execute
}

Output model for agent @dev note: this model is extended with custom actions in AgentService. You can also use some fields that are not in this model as provided by the linter, as long as they are registered in the DynamicActions model.

func (*AgentOutput) ToString

func (ao *AgentOutput) ToString() string

type AgentRunOption added in v0.1.1

type AgentRunOption func(*agentRunOptions)

AgentRunOption defines a functional option for Agent.Run

func WithAutoClose added in v0.1.1

func WithAutoClose(autoClose bool) AgentRunOption

WithAutoClose sets whether to automatically close the browser after running the agent

func WithMaxSteps added in v0.1.1

func WithMaxSteps(n int) AgentRunOption

WithMaxSteps sets the maximum number of steps for Agent.Run

func WithOnStepEnd added in v0.1.1

func WithOnStepEnd(cb func(*Agent)) AgentRunOption

WithOnStepEnd sets a callback to be called at the end of each step

func WithOnStepStart added in v0.1.1

func WithOnStepStart(cb func(*Agent)) AgentRunOption

WithOnStepStart sets a callback to be called at the start of each step

type AgentSettings

type AgentSettings struct {
	UseVision            bool    `json:"use_vision"`
	UseVisionForPlanner  bool    `json:"use_vision_for_planner"`
	SaveConversationPath *string `json:"save_conversation_path,omitempty"`
	MaxFailures          int     `json:"max_failures"`
	RetryDelay           int     `json:"retry_delay"`
	MaxInputTokens       int     `json:"max_input_tokens"`
	// ValidateOutput        bool                       `json:"validate_output"` replace to ValidateLLM
	MessageContext        *string                    `json:"message_context,omitempty"`
	GenerateGif           bool                       `json:"generate_gif"`
	AvailableFilePaths    []string                   `json:"available_file_paths"`
	OverrideSystemMessage *string                    `json:"override_system_message,omitempty"`
	ExtendSystemMessage   *string                    `json:"extend_system_message,omitempty"`
	IncludeAttributes     []string                   `json:"include_attributes"`
	MaxActionsPerStep     int                        `json:"max_actions_per_step"`
	ToolCallingMethod     *ToolCallingMethod         `json:"tool_calling_method,omitempty"`
	PageExtractionLLM     model.ToolCallingChatModel `json:"page_extraction_llm"`
	PlannerLLM            model.ToolCallingChatModel `json:"planner_llm"`
	PlannerInterval       int                        `json:"planner_interval"`
	IsPlannerReasoning    bool                       `json:"is_planner_reasoning"`

	// Procedural memory settings
	EnableMemory   bool                   `json:"enable_memory"`
	MemoryInterval int                    `json:"memory_interval"`
	MemoryConfig   map[string]interface{} `json:"memory_config"`
}

Options for the agent

func NewAgentSettings

func NewAgentSettings(config AgentSettingsConfig) *AgentSettings

type AgentSettingsConfig

type AgentSettingsConfig map[string]interface{}

type AgentState

type AgentState struct {
	AgentId             string                     `json:"agent_id"`
	NSteps              int                        `json:"n_steps"`
	ConsecutiveFailures int                        `json:"consecutive_failures"`
	LastResult          []*controller.ActionResult `json:"last_result"`
	History             *AgentHistoryList          `json:"history"`
	LastPlan            *string                    `json:"last_plan,omitempty"`
	Paused              bool                       `json:"paused"`
	Stopped             bool                       `json:"stopped"`
	MessageManagerState *MessageManagerState       `json:"message_manager_state"`
}

Holds all state information for an Agent

func NewAgentState

func NewAgentState() *AgentState

type AgentStepInfo

type AgentStepInfo struct {
	StepNumber int
	MaxSteps   int
}

func (*AgentStepInfo) IsLastStep

func (asi *AgentStepInfo) IsLastStep() bool

type BrowserStateHistory

type BrowserStateHistory = browser.BrowserStateHistory

type CurrentState

type CurrentState struct {
	EvaluationPreviousGoal string `json:"evaluation_previous_goal"`
	Memory                 string `json:"memory"`
	NextGoal               string `json:"next_goal"`
}

AgentBrain

type ManagedMessage

type ManagedMessage struct {
	Message  *schema.Message  `json:"message"`
	Metadata *MessageMetadata `json:"metadata"`
}

type MessageHistory

type MessageHistory struct {
	Messages      []ManagedMessage `json:"messages"`
	CurrentTokens int              `json:"current_tokens"`
}

func (*MessageHistory) AddMessage

func (m *MessageHistory) AddMessage(message *schema.Message, metadata *MessageMetadata, position *int)

func (*MessageHistory) AddModelOutput

func (m *MessageHistory) AddModelOutput(output *AgentOutput)

func (*MessageHistory) GetMessages

func (m *MessageHistory) GetMessages() []*schema.Message

func (*MessageHistory) GetTotalTokens

func (m *MessageHistory) GetTotalTokens() int

func (*MessageHistory) RemoveLastStateMessage

func (m *MessageHistory) RemoveLastStateMessage()

func (*MessageHistory) RemoveOldestMessage

func (m *MessageHistory) RemoveOldestMessage()

type MessageManager

type MessageManager struct {
	Task         string
	SystemPrompt *schema.Message
	Settings     *MessageManagerSettings
	State        *MessageManagerState
}

func NewMessageManager

func NewMessageManager(
	task string,
	systemPrompt *schema.Message,
	settings *MessageManagerSettings,
	state *MessageManagerState,
) *MessageManager

func (*MessageManager) AddMessageWithTokens

func (m *MessageManager) AddMessageWithTokens(
	message *schema.Message,
	position *int,
	messageType *string,
)

func (*MessageManager) AddModelOutput

func (m *MessageManager) AddModelOutput(output *AgentOutput)

func (*MessageManager) AddNewTask

func (m *MessageManager) AddNewTask(newTask string)

func (*MessageManager) AddPlan

func (m *MessageManager) AddPlan(plan *string, position *int) error

func (*MessageManager) AddStateMessage

func (m *MessageManager) AddStateMessage(
	state *browser.BrowserState,
	result []*controller.ActionResult,
	stepInfo *AgentStepInfo,
	useVision bool,
)

func (*MessageManager) CutMessages

func (m *MessageManager) CutMessages() error

func (*MessageManager) GetMessages

func (m *MessageManager) GetMessages() []*schema.Message

func (*MessageManager) RemoveLastStateMessage

func (m *MessageManager) RemoveLastStateMessage() error

func (*MessageManager) SaveConversation

func (m *MessageManager) SaveConversation(
	inputMessages []*schema.Message,
	modelOutput *AgentOutput,
	target string,
) error

type MessageManagerConfig

type MessageManagerConfig map[string]interface{}

type MessageManagerSettings

type MessageManagerSettings struct {
	MaxInputTokens              int               `json:"max_input_tokens"`
	EstimatedCharactersPerToken int               `json:"estimated_characters_per_token"`
	ImageTokens                 int               `json:"image_tokens"`
	IncludeAttributes           []string          `json:"include_attributes"`
	MessageContext              *string           `json:"message_context,omitempty"`
	SensitiveData               map[string]string `json:"sensitive_data"`
	AvailableFilePaths          []string          `json:"available_file_paths"`
}

func NewMessageManagerSettings

func NewMessageManagerSettings(config MessageManagerConfig) *MessageManagerSettings

type MessageManagerState

type MessageManagerState struct {
	History *MessageHistory
	ToolId  int
}

func NewMessageManagerState

func NewMessageManagerState() *MessageManagerState

type MessageMetadata

type MessageMetadata struct {
	Tokens      int     `json:"tokens"`
	MessageType *string `json:"message_type,omitempty"`
}

type StepMetadata

type StepMetadata struct {
	StepStartTime float64
	StepEndTime   float64
	InputTokens   int
	StepNumber    int
}

Metadata for a single step including timing and token information

func (*StepMetadata) DurationSeconds

func (sm *StepMetadata) DurationSeconds() float64

Calculate step duration in seconds

type SystemPrompt

type SystemPrompt struct {
	SystemMessage            *schema.Message
	DefaultActionDescription string
	MaxActionsPerStep        int
}

func NewSystemPrompt

func NewSystemPrompt(
	actionDescription string,
	maxActionsPerStep int,
	overrideSystemMessage *string,
	extendSystemMessage *string,
) *SystemPrompt

type ToolCallingMethod

type ToolCallingMethod string
const (
	FunctionCalling ToolCallingMethod = "function_calling"
	JSONMode        ToolCallingMethod = "json_mode"
	Raw             ToolCallingMethod = "raw"
	Auto            ToolCallingMethod = "auto"
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL