llm

package
v0.0.0-...-b31a014 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 19, 2026 License: Apache-2.0 Imports: 25 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var AppPackages = map[string]string{

	"微信":          "com.tencent.mm",
	"wechat":      "com.tencent.mm",
	"qq":          "com.tencent.mobileqq",
	"QQ":          "com.tencent.mobileqq",
	"微博":          "com.sina.weibo",
	"weibo":       "com.sina.weibo",
	"抖音":          "com.ss.android.ugc.aweme",
	"douyin":      "com.ss.android.ugc.aweme",
	"快手":          "com.smile.gifmaker",
	"kuaishou":    "com.smile.gifmaker",
	"小红书":         "com.xingin.xhs",
	"xiaohongshu": "com.xingin.xhs",
	"bilibili":    "tv.danmaku.bili",
	"B站":          "tv.danmaku.bili",
	"telegram":    "org.telegram.messenger",
	"whatsapp":    "com.whatsapp",
	"facebook":    "com.facebook.katana",
	"instagram":   "com.instagram.android",
	"twitter":     "com.twitter.android",
	"x":           "com.twitter.android",

	"淘宝":        "com.taobao.taobao",
	"taobao":    "com.taobao.taobao",
	"京东":        "com.jingdong.app.mall",
	"jd":        "com.jingdong.app.mall",
	"拼多多":       "com.xunmeng.pinduoduo",
	"pinduoduo": "com.xunmeng.pinduoduo",
	"amazon":    "com.amazon.mShop.android.shopping",
	"ebay":      "com.ebay.mobile",

	"美团":       "com.sankuai.meituan",
	"meituan":  "com.sankuai.meituan",
	"饿了么":      "me.ele",
	"eleme":    "me.ele",
	"大众点评":     "com.dianping.v1",
	"dianping": "com.dianping.v1",

	"高德地图":       "com.autonavi.minimap",
	"gaode":      "com.autonavi.minimap",
	"百度地图":       "com.baidu.BaiduMap",
	"baidumap":   "com.baidu.BaiduMap",
	"googlemaps": "com.google.android.apps.maps",
	"maps":       "com.google.android.apps.maps",

	"gmail":    "com.google.android.gm",
	"邮箱":       "com.google.android.gm",
	"chrome":   "com.android.chrome",
	"浏览器":      "com.android.chrome",
	"设置":       "com.android.settings",
	"settings": "com.android.settings",

	"home":     "com.miui.home",
	"桌面":       "com.miui.home",
	"launcher": "com.miui.home",
}

AppPackages maps app display names to Android package names

Functions

func FormatSystemPrompt

func FormatSystemPrompt(cfg PromptConfig) (string, error)

FormatSystemPrompt formats the system prompt with the given configuration.

func GetAppName

func GetAppName(packageName string) string

GetAppName returns the display name for a package name

func GetPackageName

func GetPackageName(appName string) string

GetPackageName returns the Android package name for an app display name

func GetSystemPromptTemplate

func GetSystemPromptTemplate(lang PromptLanguage) prompts.ChatPromptTemplate

GetSystemPromptTemplate returns a langchaingo ChatPromptTemplate for the system prompt.

func GetUserPromptTemplate

func GetUserPromptTemplate() prompts.ChatPromptTemplate

GetUserPromptTemplate returns a langchaingo ChatPromptTemplate for user messages.

func NewLoggingModel

func NewLoggingModel(inner llms.Model, meta LoggingMeta) llms.Model

Types

type AgentAdapter

type AgentAdapter struct {
	// contains filtered or unexported fields
}

AgentAdapter adapts LangChainAgent to the existing uiagent interface. This allows the new langchaingo-based agent to be used with existing service layer.

func NewAgentAdapter

func NewAgentAdapter(cfg AgentConfig, runner tools.UIRunner, platform tools.Platform) (*AgentAdapter, error)

NewAgentAdapter creates an adapter wrapping a LangChainAgent.

func (*AgentAdapter) ExecuteCleanupSteps

func (a *AgentAdapter) ExecuteCleanupSteps(ctx context.Context)

ExecuteCleanupSteps delegates to the underlying agent

func (*AgentAdapter) GetInternalAgent

func (a *AgentAdapter) GetInternalAgent() *LangChainAgent

GetInternalAgent returns the underlying LangChainAgent.

func (*AgentAdapter) GetLog

func (a *AgentAdapter) GetLog() *RunLog

GetLog delegates to the underlying agent

func (*AgentAdapter) GetPlanSteps

func (a *AgentAdapter) GetPlanSteps() []string

GetPlanSteps delegates to the underlying agent

func (*AgentAdapter) PlanStepActions

func (a *AgentAdapter) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)

PlanStepActions delegates to the underlying agent

func (*AgentAdapter) PlanTask

func (a *AgentAdapter) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)

PlanTask delegates to the underlying agent

func (*AgentAdapter) Reset

func (a *AgentAdapter) Reset()

Reset delegates to the underlying agent

func (*AgentAdapter) Run

func (a *AgentAdapter) Run(ctx context.Context, task string) (*uiagent.StepResult, error)

Run executes the full task and returns result compatible with existing interface.

func (*AgentAdapter) SetPlanSteps

func (a *AgentAdapter) SetPlanSteps(steps []string)

SetPlanSteps delegates to the underlying agent

func (*AgentAdapter) SetTaskID

func (a *AgentAdapter) SetTaskID(taskID string)

SetTaskID delegates to the underlying agent

func (*AgentAdapter) Step

func (a *AgentAdapter) Step(ctx context.Context, task string) (*uiagent.StepResult, error)

Step executes a single step and returns result compatible with existing interface.

type AgentConfig

type AgentConfig struct {
	// LLM settings
	BaseURL         string
	APIKey          string
	Model           string
	ReasoningEffort string // Reasoning effort level

	// Agent settings
	DeviceID        string
	Lang            string // "zh" or "en"
	MaxSteps        int
	Stream          bool
	SystemPrompt    string // Custom system prompt (optional)
	UseTaskPlanning bool   // Whether to use task decomposition planning

	// Callbacks for streaming
	OnToken func(token string)
}

AgentConfig holds configuration for the LangChain Agent.

type Client

type Client struct {
	// contains filtered or unexported fields
}

Client wraps a langchaingo LLM model with additional functionality.

func NewClient

func NewClient(cfg Config) (*Client, error)

NewClient creates a new LLM client using langchaingo's OpenAI implementation. It supports custom BaseURL for compatibility with various OpenAI-compatible APIs.

func (*Client) Call

func (c *Client) Call(ctx context.Context, prompt string, options ...llms.CallOption) (string, error)

Call is a convenience method for calling the LLM with a simple prompt.

func (*Client) GenerateContent

func (c *Client) GenerateContent(ctx context.Context, messages []llms.MessageContent, options ...llms.CallOption) (*llms.ContentResponse, error)

GenerateContent is a convenience method for generating content from the LLM.

func (*Client) GetConfig

func (c *Client) GetConfig() Config

GetConfig returns the client configuration.

func (*Client) GetLLM

func (c *Client) GetLLM() llms.Model

GetLLM returns the underlying langchaingo LLM model.

type Config

type Config struct {
	BaseURL string // Custom API base URL (e.g., for proxies or alternative providers)
	APIKey  string // API key for authentication
	Model   string // Model name (e.g., "gpt-4-vision-preview")
}

Config holds the configuration for creating an LLM client.

type ConversationMemory

type ConversationMemory struct {
	// contains filtered or unexported fields
}

ConversationMemory wraps langchaingo's memory for conversation management.

func NewConversationMemory

func NewConversationMemory(cfg MemoryConfig) *ConversationMemory

NewConversationMemory creates a new conversation memory instance.

func (*ConversationMemory) Clear

func (m *ConversationMemory) Clear(ctx context.Context) error

Clear clears all conversation history.

func (*ConversationMemory) GetBuffer

GetBuffer returns the underlying conversation buffer.

func (*ConversationMemory) GetChatHistory

func (m *ConversationMemory) GetChatHistory(ctx context.Context) (string, error)

GetChatHistory returns the chat history as a formatted string.

func (*ConversationMemory) LoadMemoryVariables

func (m *ConversationMemory) LoadMemoryVariables(ctx context.Context) (map[string]interface{}, error)

LoadMemoryVariables loads the conversation history.

func (*ConversationMemory) SaveContext

func (m *ConversationMemory) SaveContext(ctx context.Context, humanInput, aiOutput string) error

SaveContext saves a human-AI interaction to memory.

type LLMInteractionRecord

type LLMInteractionRecord struct {
	TaskID       string                 `json:"task_id"`
	StepIndex    int                    `json:"step_index"` // -1 for PlanTask, >=0 for step actions
	RequestData  map[string]interface{} `json:"request_data"`
	ResponseData map[string]interface{} `json:"response_data"`
	LatencyMs    int64                  `json:"latency_ms"`
	Success      bool                   `json:"success"`
	Error        string                 `json:"error,omitempty"`
}

LLMInteractionRecord records LLM interaction for database storage

type LangChainAgent

type LangChainAgent struct {
	// contains filtered or unexported fields
}

LangChainAgent implements UIAgent using langchaingo.

func NewLangChainAgent

func NewLangChainAgent(cfg AgentConfig, runner tools.UIRunner, platform tools.Platform) (*LangChainAgent, error)

NewLangChainAgent creates a new LangChain-based agent.

func (*LangChainAgent) ExecuteCleanupSteps

func (a *LangChainAgent) ExecuteCleanupSteps(ctx context.Context)

ExecuteCleanupSteps performs cleanup actions when task finishes: 1. Exit the current app (recorded as a step for replay) 2. Restore ADB keyboard to original IME This should be called by the service layer after the entire task completes.

func (*LangChainAgent) GetLog

func (a *LangChainAgent) GetLog() *RunLog

GetLog returns the execution log.

func (*LangChainAgent) GetPlanSteps

func (a *LangChainAgent) GetPlanSteps() []string

GetPlanSteps returns the planned task steps

func (*LangChainAgent) PlanStepActions

func (a *LangChainAgent) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)

PlanStepActions converts a single step into executable UI actions

func (*LangChainAgent) PlanTask

func (a *LangChainAgent) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)

PlanTask breaks down a task into steps using LLM

func (*LangChainAgent) Reset

func (a *LangChainAgent) Reset()

Reset clears the agent state.

func (*LangChainAgent) Run

func (a *LangChainAgent) Run(ctx context.Context, task string) (*StepResult, error)

Run executes the full task until completion or max steps using task decomposition.

func (*LangChainAgent) SetPlanSteps

func (a *LangChainAgent) SetPlanSteps(steps []string)

SetPlanSteps sets the planned task steps

func (*LangChainAgent) SetTaskID

func (a *LangChainAgent) SetTaskID(taskID string)

SetTaskID sets the current task ID for interaction records

func (*LangChainAgent) Step

func (a *LangChainAgent) Step(ctx context.Context, task string) (*StepResult, error)

Step executes a single agent step by directly calling the LLM.

func (*LangChainAgent) StepWithPlan

func (a *LangChainAgent) StepWithPlan(ctx context.Context, step string, stepIndex int) (*StepResult, error)

StepWithPlan executes a single planned step with enhanced context by directly calling the LLM.

type LoggingMeta

type LoggingMeta struct {
	BaseURL string
	Model   string
}

type MemoryConfig

type MemoryConfig struct {
	Type       MemoryType // Type of memory to use
	MaxHistory int        // Maximum number of messages to keep (for window type)
}

MemoryConfig holds the configuration for conversation memory.

func DefaultMemoryConfig

func DefaultMemoryConfig() MemoryConfig

DefaultMemoryConfig returns the default memory configuration.

type MemoryType

type MemoryType string

MemoryType represents the type of conversation memory.

const (
	MemoryTypeBuffer  MemoryType = "buffer"  // Full conversation buffer
	MemoryTypeWindow  MemoryType = "window"  // Sliding window of recent messages
	MemoryTypeSummary MemoryType = "summary" // Summarized conversation history
)

type OperationRecord

type OperationRecord struct {
	StepIndex int
	Thinking  string
	Action    string
	Result    string
	Timestamp time.Time
}

OperationRecord records a single operation in history.

type ParsedAction

type ParsedAction struct {
	Name string
	Args map[string]interface{}
	Raw  string
}

ParsedAction represents a parsed action from the agent.

type PlanStepActionsResult

type PlanStepActionsResult struct {
	Actions     []string              `json:"actions"` // Planned actions for the step
	Interaction *LLMInteractionRecord `json:"-"`       // LLM interaction record for database storage
}

PlanStepActionsResult contains the result of step action planning

type PlanTaskResult

type PlanTaskResult struct {
	Steps       []string              `json:"steps"` // Planned task steps
	Interaction *LLMInteractionRecord `json:"-"`     // LLM interaction record for database storage
}

PlanTaskResult contains the result of task planning

type PlatformAdapter

type PlatformAdapter struct {
	// contains filtered or unexported fields
}

PlatformAdapter adapts existing platform to tools.Platform interface.

func NewPlatformAdapter

func NewPlatformAdapter(p PlatformCapturer) *PlatformAdapter

NewPlatformAdapter creates a platform adapter.

func (*PlatformAdapter) CaptureState

func (p *PlatformAdapter) CaptureState(ctx context.Context) (*tools.ScreenState, error)

CaptureState captures the current screen state.

func (*PlatformAdapter) DeviceID

func (p *PlatformAdapter) DeviceID() string

DeviceID returns the device identifier.

func (*PlatformAdapter) GetInstalledApps

func (p *PlatformAdapter) GetInstalledApps(ctx context.Context) (map[string]string, error)

GetInstalledApps returns a map of app name to package name.

func (*PlatformAdapter) SetDeviceID

func (p *PlatformAdapter) SetDeviceID(deviceID string)

SetDeviceID sets the device ID for the platform.

type PlatformCapturer

type PlatformCapturer interface {
	CaptureState(ctx context.Context) (uiagent.ScreenInfo, error)
}

PlatformCapturer is the interface for platforms that can capture screen state.

type PlatformWithApps

type PlatformWithApps interface {
	GetInstalledApps(ctx context.Context) (map[string]string, error)
}

PlatformWithApps is an optional interface for platforms that can list installed apps.

type PromptConfig

type PromptConfig struct {
	Language    PromptLanguage
	OSVersion   string
	DeviceModel string
}

PromptConfig holds the configuration for prompt generation.

type PromptLanguage

type PromptLanguage string

PromptLanguage represents the language for prompts.

const (
	PromptLangChinese PromptLanguage = "zh"
	PromptLangEnglish PromptLanguage = "en"
)

type RunLog

type RunLog struct {
	Task      string
	DeviceID  string
	Steps     []StepExecution
	Variables map[string]interface{}
}

RunLog records the execution history.

type RunnerAdapter

type RunnerAdapter struct {
	// contains filtered or unexported fields
}

RunnerAdapter adapts existing UIRunner (RunStep-based) to tools.UIRunner interface.

func NewRunnerAdapter

func NewRunnerAdapter(r StepRunner) *RunnerAdapter

NewRunnerAdapter creates a runner adapter.

func (*RunnerAdapter) Back

func (r *RunnerAdapter) Back(ctx context.Context) error

Back implements tools.UIRunner

func (*RunnerAdapter) ClearLastOutput

func (r *RunnerAdapter) ClearLastOutput()

ClearLastOutput clears cached output from previous action.

func (*RunnerAdapter) DoubleTap

func (r *RunnerAdapter) DoubleTap(ctx context.Context, x, y int) error

DoubleTap implements tools.UIRunner

func (*RunnerAdapter) Home

func (r *RunnerAdapter) Home(ctx context.Context) error

Home implements tools.UIRunner

func (*RunnerAdapter) LastOutput

func (r *RunnerAdapter) LastOutput() map[string]interface{}

LastOutput returns the cached output from the most recent RunStep execution.

func (*RunnerAdapter) Launch

func (r *RunnerAdapter) Launch(ctx context.Context, packageName string) error

Launch implements tools.UIRunner

func (*RunnerAdapter) LongPress

func (r *RunnerAdapter) LongPress(ctx context.Context, x, y int, durationMs int) error

LongPress implements tools.UIRunner

func (*RunnerAdapter) SetDeviceID

func (r *RunnerAdapter) SetDeviceID(deviceID string)

SetDeviceID sets the device ID for the runner.

func (*RunnerAdapter) Swipe

func (r *RunnerAdapter) Swipe(ctx context.Context, startX, startY, endX, endY int) error

Swipe implements tools.UIRunner

func (*RunnerAdapter) Tap

func (r *RunnerAdapter) Tap(ctx context.Context, x, y int) error

Tap implements tools.UIRunner

func (*RunnerAdapter) Type

func (r *RunnerAdapter) Type(ctx context.Context, text string) error

Type implements tools.UIRunner

func (*RunnerAdapter) Wait

func (r *RunnerAdapter) Wait(ctx context.Context, durationMs int) error

Wait implements tools.UIRunner

type StepExecution

type StepExecution struct {
	Index     int
	Action    string
	Result    uiagent.StepExecResult
	Timestamp time.Time
}

StepExecution records a single step execution.

type StepResult

type StepResult struct {
	Success  bool
	Finished bool
	Thinking string
	Message  string
	Action   *ParsedAction
	Log      *RunLog
	Err      error
}

StepResult represents the result of a single agent step.

type StepRunner

type StepRunner interface {
	RunStep(ctx context.Context, step uiagent.ActionStep) (uiagent.StepExecResult, error)
}

StepRunner is the interface for runners that execute ActionSteps.

type UIAgent

type UIAgent interface {
	Run(ctx context.Context, task string) (*StepResult, error)
	Step(ctx context.Context, task string) (*StepResult, error)
	PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
	PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
	Reset()
	GetLog() *RunLog
	SetTaskID(taskID string)
	GetPlanSteps() []string
	SetPlanSteps(steps []string)
}

UIAgent defines the interface for UI automation agent.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL