Documentation
¶
Index ¶
- Variables
- func FormatSystemPrompt(cfg PromptConfig) (string, error)
- func GetAppName(packageName string) string
- func GetPackageName(appName string) string
- func GetSystemPromptTemplate(lang PromptLanguage) prompts.ChatPromptTemplate
- func GetUserPromptTemplate() prompts.ChatPromptTemplate
- func NewLoggingModel(inner llms.Model, meta LoggingMeta) llms.Model
- type AgentAdapter
- func (a *AgentAdapter) ExecuteCleanupSteps(ctx context.Context)
- func (a *AgentAdapter) GetInternalAgent() *LangChainAgent
- func (a *AgentAdapter) GetLog() *RunLog
- func (a *AgentAdapter) GetPlanSteps() []string
- func (a *AgentAdapter) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
- func (a *AgentAdapter) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
- func (a *AgentAdapter) Reset()
- func (a *AgentAdapter) Run(ctx context.Context, task string) (*uiagent.StepResult, error)
- func (a *AgentAdapter) SetPlanSteps(steps []string)
- func (a *AgentAdapter) SetTaskID(taskID string)
- func (a *AgentAdapter) Step(ctx context.Context, task string) (*uiagent.StepResult, error)
- type AgentConfig
- type Client
- type Config
- type ConversationMemory
- func (m *ConversationMemory) Clear(ctx context.Context) error
- func (m *ConversationMemory) GetBuffer() *memory.ConversationBuffer
- func (m *ConversationMemory) GetChatHistory(ctx context.Context) (string, error)
- func (m *ConversationMemory) LoadMemoryVariables(ctx context.Context) (map[string]interface{}, error)
- func (m *ConversationMemory) SaveContext(ctx context.Context, humanInput, aiOutput string) error
- type LLMInteractionRecord
- type LangChainAgent
- func (a *LangChainAgent) ExecuteCleanupSteps(ctx context.Context)
- func (a *LangChainAgent) GetLog() *RunLog
- func (a *LangChainAgent) GetPlanSteps() []string
- func (a *LangChainAgent) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
- func (a *LangChainAgent) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
- func (a *LangChainAgent) Reset()
- func (a *LangChainAgent) Run(ctx context.Context, task string) (*StepResult, error)
- func (a *LangChainAgent) SetPlanSteps(steps []string)
- func (a *LangChainAgent) SetTaskID(taskID string)
- func (a *LangChainAgent) Step(ctx context.Context, task string) (*StepResult, error)
- func (a *LangChainAgent) StepWithPlan(ctx context.Context, step string, stepIndex int) (*StepResult, error)
- type LoggingMeta
- type MemoryConfig
- type MemoryType
- type OperationRecord
- type ParsedAction
- type PlanStepActionsResult
- type PlanTaskResult
- type PlatformAdapter
- type PlatformCapturer
- type PlatformWithApps
- type PromptConfig
- type PromptLanguage
- type RunLog
- type RunnerAdapter
- func (r *RunnerAdapter) Back(ctx context.Context) error
- func (r *RunnerAdapter) ClearLastOutput()
- func (r *RunnerAdapter) DoubleTap(ctx context.Context, x, y int) error
- func (r *RunnerAdapter) Home(ctx context.Context) error
- func (r *RunnerAdapter) LastOutput() map[string]interface{}
- func (r *RunnerAdapter) Launch(ctx context.Context, packageName string) error
- func (r *RunnerAdapter) LongPress(ctx context.Context, x, y int, durationMs int) error
- func (r *RunnerAdapter) SetDeviceID(deviceID string)
- func (r *RunnerAdapter) Swipe(ctx context.Context, startX, startY, endX, endY int) error
- func (r *RunnerAdapter) Tap(ctx context.Context, x, y int) error
- func (r *RunnerAdapter) Type(ctx context.Context, text string) error
- func (r *RunnerAdapter) Wait(ctx context.Context, durationMs int) error
- type StepExecution
- type StepResult
- type StepRunner
- type UIAgent
Constants ¶
This section is empty.
Variables ¶
var AppPackages = map[string]string{
"微信": "com.tencent.mm",
"wechat": "com.tencent.mm",
"qq": "com.tencent.mobileqq",
"QQ": "com.tencent.mobileqq",
"微博": "com.sina.weibo",
"weibo": "com.sina.weibo",
"抖音": "com.ss.android.ugc.aweme",
"douyin": "com.ss.android.ugc.aweme",
"快手": "com.smile.gifmaker",
"kuaishou": "com.smile.gifmaker",
"小红书": "com.xingin.xhs",
"xiaohongshu": "com.xingin.xhs",
"bilibili": "tv.danmaku.bili",
"B站": "tv.danmaku.bili",
"telegram": "org.telegram.messenger",
"whatsapp": "com.whatsapp",
"facebook": "com.facebook.katana",
"instagram": "com.instagram.android",
"twitter": "com.twitter.android",
"x": "com.twitter.android",
"淘宝": "com.taobao.taobao",
"taobao": "com.taobao.taobao",
"京东": "com.jingdong.app.mall",
"jd": "com.jingdong.app.mall",
"拼多多": "com.xunmeng.pinduoduo",
"pinduoduo": "com.xunmeng.pinduoduo",
"amazon": "com.amazon.mShop.android.shopping",
"ebay": "com.ebay.mobile",
"美团": "com.sankuai.meituan",
"meituan": "com.sankuai.meituan",
"饿了么": "me.ele",
"eleme": "me.ele",
"大众点评": "com.dianping.v1",
"dianping": "com.dianping.v1",
"高德地图": "com.autonavi.minimap",
"gaode": "com.autonavi.minimap",
"百度地图": "com.baidu.BaiduMap",
"baidumap": "com.baidu.BaiduMap",
"googlemaps": "com.google.android.apps.maps",
"maps": "com.google.android.apps.maps",
"gmail": "com.google.android.gm",
"邮箱": "com.google.android.gm",
"chrome": "com.android.chrome",
"浏览器": "com.android.chrome",
"设置": "com.android.settings",
"settings": "com.android.settings",
"home": "com.miui.home",
"桌面": "com.miui.home",
"launcher": "com.miui.home",
}
AppPackages maps app display names to Android package names
Functions ¶
func FormatSystemPrompt ¶
func FormatSystemPrompt(cfg PromptConfig) (string, error)
FormatSystemPrompt formats the system prompt with the given configuration.
func GetAppName ¶
GetAppName returns the display name for a package name
func GetPackageName ¶
GetPackageName returns the Android package name for an app display name
func GetSystemPromptTemplate ¶
func GetSystemPromptTemplate(lang PromptLanguage) prompts.ChatPromptTemplate
GetSystemPromptTemplate returns a langchaingo ChatPromptTemplate for the system prompt.
func GetUserPromptTemplate ¶
func GetUserPromptTemplate() prompts.ChatPromptTemplate
GetUserPromptTemplate returns a langchaingo ChatPromptTemplate for user messages.
func NewLoggingModel ¶
func NewLoggingModel(inner llms.Model, meta LoggingMeta) llms.Model
Types ¶
type AgentAdapter ¶
type AgentAdapter struct {
// contains filtered or unexported fields
}
AgentAdapter adapts LangChainAgent to the existing uiagent interface. This allows the new langchaingo-based agent to be used with existing service layer.
func NewAgentAdapter ¶
func NewAgentAdapter(cfg AgentConfig, runner tools.UIRunner, platform tools.Platform) (*AgentAdapter, error)
NewAgentAdapter creates an adapter wrapping a LangChainAgent.
func (*AgentAdapter) ExecuteCleanupSteps ¶
func (a *AgentAdapter) ExecuteCleanupSteps(ctx context.Context)
ExecuteCleanupSteps delegates to the underlying agent
func (*AgentAdapter) GetInternalAgent ¶
func (a *AgentAdapter) GetInternalAgent() *LangChainAgent
GetInternalAgent returns the underlying LangChainAgent.
func (*AgentAdapter) GetLog ¶
func (a *AgentAdapter) GetLog() *RunLog
GetLog delegates to the underlying agent
func (*AgentAdapter) GetPlanSteps ¶
func (a *AgentAdapter) GetPlanSteps() []string
GetPlanSteps delegates to the underlying agent
func (*AgentAdapter) PlanStepActions ¶
func (a *AgentAdapter) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
PlanStepActions delegates to the underlying agent
func (*AgentAdapter) PlanTask ¶
func (a *AgentAdapter) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
PlanTask delegates to the underlying agent
func (*AgentAdapter) Run ¶
func (a *AgentAdapter) Run(ctx context.Context, task string) (*uiagent.StepResult, error)
Run executes the full task and returns result compatible with existing interface.
func (*AgentAdapter) SetPlanSteps ¶
func (a *AgentAdapter) SetPlanSteps(steps []string)
SetPlanSteps delegates to the underlying agent
func (*AgentAdapter) SetTaskID ¶
func (a *AgentAdapter) SetTaskID(taskID string)
SetTaskID delegates to the underlying agent
func (*AgentAdapter) Step ¶
func (a *AgentAdapter) Step(ctx context.Context, task string) (*uiagent.StepResult, error)
Step executes a single step and returns result compatible with existing interface.
type AgentConfig ¶
type AgentConfig struct {
// LLM settings
BaseURL string
APIKey string
Model string
ReasoningEffort string // Reasoning effort level
// Agent settings
DeviceID string
Lang string // "zh" or "en"
MaxSteps int
Stream bool
SystemPrompt string // Custom system prompt (optional)
UseTaskPlanning bool // Whether to use task decomposition planning
// Callbacks for streaming
OnToken func(token string)
}
AgentConfig holds configuration for the LangChain Agent.
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client wraps a langchaingo LLM model with additional functionality.
func NewClient ¶
NewClient creates a new LLM client using langchaingo's OpenAI implementation. It supports custom BaseURL for compatibility with various OpenAI-compatible APIs.
func (*Client) Call ¶
func (c *Client) Call(ctx context.Context, prompt string, options ...llms.CallOption) (string, error)
Call is a convenience method for calling the LLM with a simple prompt.
func (*Client) GenerateContent ¶
func (c *Client) GenerateContent(ctx context.Context, messages []llms.MessageContent, options ...llms.CallOption) (*llms.ContentResponse, error)
GenerateContent is a convenience method for generating content from the LLM.
type Config ¶
type Config struct {
BaseURL string // Custom API base URL (e.g., for proxies or alternative providers)
APIKey string // API key for authentication
Model string // Model name (e.g., "gpt-4-vision-preview")
}
Config holds the configuration for creating an LLM client.
type ConversationMemory ¶
type ConversationMemory struct {
// contains filtered or unexported fields
}
ConversationMemory wraps langchaingo's memory for conversation management.
func NewConversationMemory ¶
func NewConversationMemory(cfg MemoryConfig) *ConversationMemory
NewConversationMemory creates a new conversation memory instance.
func (*ConversationMemory) Clear ¶
func (m *ConversationMemory) Clear(ctx context.Context) error
Clear clears all conversation history.
func (*ConversationMemory) GetBuffer ¶
func (m *ConversationMemory) GetBuffer() *memory.ConversationBuffer
GetBuffer returns the underlying conversation buffer.
func (*ConversationMemory) GetChatHistory ¶
func (m *ConversationMemory) GetChatHistory(ctx context.Context) (string, error)
GetChatHistory returns the chat history as a formatted string.
func (*ConversationMemory) LoadMemoryVariables ¶
func (m *ConversationMemory) LoadMemoryVariables(ctx context.Context) (map[string]interface{}, error)
LoadMemoryVariables loads the conversation history.
func (*ConversationMemory) SaveContext ¶
func (m *ConversationMemory) SaveContext(ctx context.Context, humanInput, aiOutput string) error
SaveContext saves a human-AI interaction to memory.
type LLMInteractionRecord ¶
type LLMInteractionRecord struct {
TaskID string `json:"task_id"`
StepIndex int `json:"step_index"` // -1 for PlanTask, >=0 for step actions
RequestData map[string]interface{} `json:"request_data"`
ResponseData map[string]interface{} `json:"response_data"`
LatencyMs int64 `json:"latency_ms"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
LLMInteractionRecord records LLM interaction for database storage
type LangChainAgent ¶
type LangChainAgent struct {
// contains filtered or unexported fields
}
LangChainAgent implements UIAgent using langchaingo.
func NewLangChainAgent ¶
func NewLangChainAgent(cfg AgentConfig, runner tools.UIRunner, platform tools.Platform) (*LangChainAgent, error)
NewLangChainAgent creates a new LangChain-based agent.
func (*LangChainAgent) ExecuteCleanupSteps ¶
func (a *LangChainAgent) ExecuteCleanupSteps(ctx context.Context)
ExecuteCleanupSteps performs cleanup actions when task finishes: 1. Exit the current app (recorded as a step for replay) 2. Restore ADB keyboard to original IME This should be called by the service layer after the entire task completes.
func (*LangChainAgent) GetLog ¶
func (a *LangChainAgent) GetLog() *RunLog
GetLog returns the execution log.
func (*LangChainAgent) GetPlanSteps ¶
func (a *LangChainAgent) GetPlanSteps() []string
GetPlanSteps returns the planned task steps
func (*LangChainAgent) PlanStepActions ¶
func (a *LangChainAgent) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
PlanStepActions converts a single step into executable UI actions
func (*LangChainAgent) PlanTask ¶
func (a *LangChainAgent) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
PlanTask breaks down a task into steps using LLM
func (*LangChainAgent) Run ¶
func (a *LangChainAgent) Run(ctx context.Context, task string) (*StepResult, error)
Run executes the full task until completion or max steps using task decomposition.
func (*LangChainAgent) SetPlanSteps ¶
func (a *LangChainAgent) SetPlanSteps(steps []string)
SetPlanSteps sets the planned task steps
func (*LangChainAgent) SetTaskID ¶
func (a *LangChainAgent) SetTaskID(taskID string)
SetTaskID sets the current task ID for interaction records
func (*LangChainAgent) Step ¶
func (a *LangChainAgent) Step(ctx context.Context, task string) (*StepResult, error)
Step executes a single agent step by directly calling the LLM.
func (*LangChainAgent) StepWithPlan ¶
func (a *LangChainAgent) StepWithPlan(ctx context.Context, step string, stepIndex int) (*StepResult, error)
StepWithPlan executes a single planned step with enhanced context by directly calling the LLM.
type LoggingMeta ¶
type MemoryConfig ¶
type MemoryConfig struct {
Type MemoryType // Type of memory to use
MaxHistory int // Maximum number of messages to keep (for window type)
}
MemoryConfig holds the configuration for conversation memory.
func DefaultMemoryConfig ¶
func DefaultMemoryConfig() MemoryConfig
DefaultMemoryConfig returns the default memory configuration.
type MemoryType ¶
type MemoryType string
MemoryType represents the type of conversation memory.
const ( MemoryTypeBuffer MemoryType = "buffer" // Full conversation buffer MemoryTypeWindow MemoryType = "window" // Sliding window of recent messages MemoryTypeSummary MemoryType = "summary" // Summarized conversation history )
type OperationRecord ¶
type OperationRecord struct {
StepIndex int
Thinking string
Action string
Result string
Timestamp time.Time
}
OperationRecord records a single operation in history.
type ParsedAction ¶
ParsedAction represents a parsed action from the agent.
type PlanStepActionsResult ¶
type PlanStepActionsResult struct {
Actions []string `json:"actions"` // Planned actions for the step
Interaction *LLMInteractionRecord `json:"-"` // LLM interaction record for database storage
}
PlanStepActionsResult contains the result of step action planning
type PlanTaskResult ¶
type PlanTaskResult struct {
Steps []string `json:"steps"` // Planned task steps
Interaction *LLMInteractionRecord `json:"-"` // LLM interaction record for database storage
}
PlanTaskResult contains the result of task planning
type PlatformAdapter ¶
type PlatformAdapter struct {
// contains filtered or unexported fields
}
PlatformAdapter adapts existing platform to tools.Platform interface.
func NewPlatformAdapter ¶
func NewPlatformAdapter(p PlatformCapturer) *PlatformAdapter
NewPlatformAdapter creates a platform adapter.
func (*PlatformAdapter) CaptureState ¶
func (p *PlatformAdapter) CaptureState(ctx context.Context) (*tools.ScreenState, error)
CaptureState captures the current screen state.
func (*PlatformAdapter) DeviceID ¶
func (p *PlatformAdapter) DeviceID() string
DeviceID returns the device identifier.
func (*PlatformAdapter) GetInstalledApps ¶
GetInstalledApps returns a map of app name to package name.
func (*PlatformAdapter) SetDeviceID ¶
func (p *PlatformAdapter) SetDeviceID(deviceID string)
SetDeviceID sets the device ID for the platform.
type PlatformCapturer ¶
type PlatformCapturer interface {
CaptureState(ctx context.Context) (uiagent.ScreenInfo, error)
}
PlatformCapturer is the interface for platforms that can capture screen state.
type PlatformWithApps ¶
type PlatformWithApps interface {
GetInstalledApps(ctx context.Context) (map[string]string, error)
}
PlatformWithApps is an optional interface for platforms that can list installed apps.
type PromptConfig ¶
type PromptConfig struct {
Language PromptLanguage
OSVersion string
DeviceModel string
}
PromptConfig holds the configuration for prompt generation.
type PromptLanguage ¶
type PromptLanguage string
PromptLanguage represents the language for prompts.
const ( PromptLangChinese PromptLanguage = "zh" PromptLangEnglish PromptLanguage = "en" )
type RunLog ¶
type RunLog struct {
Task string
DeviceID string
Steps []StepExecution
Variables map[string]interface{}
}
RunLog records the execution history.
type RunnerAdapter ¶
type RunnerAdapter struct {
// contains filtered or unexported fields
}
RunnerAdapter adapts existing UIRunner (RunStep-based) to tools.UIRunner interface.
func NewRunnerAdapter ¶
func NewRunnerAdapter(r StepRunner) *RunnerAdapter
NewRunnerAdapter creates a runner adapter.
func (*RunnerAdapter) Back ¶
func (r *RunnerAdapter) Back(ctx context.Context) error
Back implements tools.UIRunner
func (*RunnerAdapter) ClearLastOutput ¶
func (r *RunnerAdapter) ClearLastOutput()
ClearLastOutput clears cached output from previous action.
func (*RunnerAdapter) DoubleTap ¶
func (r *RunnerAdapter) DoubleTap(ctx context.Context, x, y int) error
DoubleTap implements tools.UIRunner
func (*RunnerAdapter) Home ¶
func (r *RunnerAdapter) Home(ctx context.Context) error
Home implements tools.UIRunner
func (*RunnerAdapter) LastOutput ¶
func (r *RunnerAdapter) LastOutput() map[string]interface{}
LastOutput returns the cached output from the most recent RunStep execution.
func (*RunnerAdapter) Launch ¶
func (r *RunnerAdapter) Launch(ctx context.Context, packageName string) error
Launch implements tools.UIRunner
func (*RunnerAdapter) SetDeviceID ¶
func (r *RunnerAdapter) SetDeviceID(deviceID string)
SetDeviceID sets the device ID for the runner.
func (*RunnerAdapter) Swipe ¶
func (r *RunnerAdapter) Swipe(ctx context.Context, startX, startY, endX, endY int) error
Swipe implements tools.UIRunner
func (*RunnerAdapter) Tap ¶
func (r *RunnerAdapter) Tap(ctx context.Context, x, y int) error
Tap implements tools.UIRunner
type StepExecution ¶
type StepExecution struct {
Index int
Action string
Result uiagent.StepExecResult
Timestamp time.Time
}
StepExecution records a single step execution.
type StepResult ¶
type StepResult struct {
Success bool
Finished bool
Thinking string
Message string
Action *ParsedAction
Log *RunLog
Err error
}
StepResult represents the result of a single agent step.
type StepRunner ¶
type StepRunner interface {
RunStep(ctx context.Context, step uiagent.ActionStep) (uiagent.StepExecResult, error)
}
StepRunner is the interface for runners that execute ActionSteps.
type UIAgent ¶
type UIAgent interface {
Run(ctx context.Context, task string) (*StepResult, error)
Step(ctx context.Context, task string) (*StepResult, error)
PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
Reset()
GetLog() *RunLog
SetTaskID(taskID string)
GetPlanSteps() []string
SetPlanSteps(steps []string)
}
UIAgent defines the interface for UI automation agent.