llm

package

v0.0.0-...-b31a014 Latest Latest Go to latest Published: Jan 19, 2026 License: Apache-2.0 Imports: 25 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/lingcetech/visiobotclient

Links

Open Source Insights

Documentation ¶

Index ¶

Variables
func FormatSystemPrompt(cfg PromptConfig) (string, error)
func GetAppName(packageName string) string
func GetPackageName(appName string) string
func GetSystemPromptTemplate(lang PromptLanguage) prompts.ChatPromptTemplate
func GetUserPromptTemplate() prompts.ChatPromptTemplate
func NewLoggingModel(inner llms.Model, meta LoggingMeta) llms.Model
type AgentAdapter
- func NewAgentAdapter(cfg AgentConfig, runner tools.UIRunner, platform tools.Platform) (*AgentAdapter, error)
- func (a *AgentAdapter) ExecuteCleanupSteps(ctx context.Context)
- func (a *AgentAdapter) GetInternalAgent() *LangChainAgent
- func (a *AgentAdapter) GetLog() *RunLog
- func (a *AgentAdapter) GetPlanSteps() []string
- func (a *AgentAdapter) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
- func (a *AgentAdapter) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
- func (a *AgentAdapter) Reset()
- func (a *AgentAdapter) Run(ctx context.Context, task string) (*uiagent.StepResult, error)
- func (a *AgentAdapter) SetPlanSteps(steps []string)
- func (a *AgentAdapter) SetTaskID(taskID string)
- func (a *AgentAdapter) Step(ctx context.Context, task string) (*uiagent.StepResult, error)
type AgentConfig
type Client
- func NewClient(cfg Config) (*Client, error)
- func (c *Client) Call(ctx context.Context, prompt string, options ...llms.CallOption) (string, error)
- func (c *Client) GenerateContent(ctx context.Context, messages []llms.MessageContent, ...) (*llms.ContentResponse, error)
- func (c *Client) GetConfig() Config
- func (c *Client) GetLLM() llms.Model
type Config
type ConversationMemory
- func NewConversationMemory(cfg MemoryConfig) *ConversationMemory
- func (m *ConversationMemory) Clear(ctx context.Context) error
- func (m *ConversationMemory) GetBuffer() *memory.ConversationBuffer
- func (m *ConversationMemory) GetChatHistory(ctx context.Context) (string, error)
- func (m *ConversationMemory) LoadMemoryVariables(ctx context.Context) (map[string]interface{}, error)
- func (m *ConversationMemory) SaveContext(ctx context.Context, humanInput, aiOutput string) error
type LLMInteractionRecord
type LangChainAgent
- func NewLangChainAgent(cfg AgentConfig, runner tools.UIRunner, platform tools.Platform) (*LangChainAgent, error)
- func (a *LangChainAgent) ExecuteCleanupSteps(ctx context.Context)
- func (a *LangChainAgent) GetLog() *RunLog
- func (a *LangChainAgent) GetPlanSteps() []string
- func (a *LangChainAgent) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
- func (a *LangChainAgent) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
- func (a *LangChainAgent) Reset()
- func (a *LangChainAgent) Run(ctx context.Context, task string) (*StepResult, error)
- func (a *LangChainAgent) SetPlanSteps(steps []string)
- func (a *LangChainAgent) SetTaskID(taskID string)
- func (a *LangChainAgent) Step(ctx context.Context, task string) (*StepResult, error)
- func (a *LangChainAgent) StepWithPlan(ctx context.Context, step string, stepIndex int) (*StepResult, error)
type LoggingMeta
type MemoryConfig
- func DefaultMemoryConfig() MemoryConfig
type MemoryType
type OperationRecord
type ParsedAction
type PlanStepActionsResult
type PlanTaskResult
type PlatformAdapter
- func NewPlatformAdapter(p PlatformCapturer) *PlatformAdapter
- func (p *PlatformAdapter) CaptureState(ctx context.Context) (*tools.ScreenState, error)
- func (p *PlatformAdapter) DeviceID() string
- func (p *PlatformAdapter) GetInstalledApps(ctx context.Context) (map[string]string, error)
- func (p *PlatformAdapter) SetDeviceID(deviceID string)
type PlatformCapturer
type PlatformWithApps
type PromptConfig
type PromptLanguage
type RunLog
type RunnerAdapter
- func NewRunnerAdapter(r StepRunner) *RunnerAdapter
- func (r *RunnerAdapter) Back(ctx context.Context) error
- func (r *RunnerAdapter) ClearLastOutput()
- func (r *RunnerAdapter) DoubleTap(ctx context.Context, x, y int) error
- func (r *RunnerAdapter) Home(ctx context.Context) error
- func (r *RunnerAdapter) LastOutput() map[string]interface{}
- func (r *RunnerAdapter) Launch(ctx context.Context, packageName string) error
- func (r *RunnerAdapter) LongPress(ctx context.Context, x, y int, durationMs int) error
- func (r *RunnerAdapter) SetDeviceID(deviceID string)
- func (r *RunnerAdapter) Swipe(ctx context.Context, startX, startY, endX, endY int) error
- func (r *RunnerAdapter) Tap(ctx context.Context, x, y int) error
- func (r *RunnerAdapter) Type(ctx context.Context, text string) error
- func (r *RunnerAdapter) Wait(ctx context.Context, durationMs int) error
type StepExecution
type StepResult
type StepRunner
type UIAgent

Constants ¶

This section is empty.

Variables ¶

View Source

var AppPackages = map[string]string{

	"微信":          "com.tencent.mm",
	"wechat":      "com.tencent.mm",
	"qq":          "com.tencent.mobileqq",
	"QQ":          "com.tencent.mobileqq",
	"微博":          "com.sina.weibo",
	"weibo":       "com.sina.weibo",
	"抖音":          "com.ss.android.ugc.aweme",
	"douyin":      "com.ss.android.ugc.aweme",
	"快手":          "com.smile.gifmaker",
	"kuaishou":    "com.smile.gifmaker",
	"小红书":         "com.xingin.xhs",
	"xiaohongshu": "com.xingin.xhs",
	"bilibili":    "tv.danmaku.bili",
	"B站":          "tv.danmaku.bili",
	"telegram":    "org.telegram.messenger",
	"whatsapp":    "com.whatsapp",
	"facebook":    "com.facebook.katana",
	"instagram":   "com.instagram.android",
	"twitter":     "com.twitter.android",
	"x":           "com.twitter.android",

	"淘宝":        "com.taobao.taobao",
	"taobao":    "com.taobao.taobao",
	"京东":        "com.jingdong.app.mall",
	"jd":        "com.jingdong.app.mall",
	"拼多多":       "com.xunmeng.pinduoduo",
	"pinduoduo": "com.xunmeng.pinduoduo",
	"amazon":    "com.amazon.mShop.android.shopping",
	"ebay":      "com.ebay.mobile",

	"美团":       "com.sankuai.meituan",
	"meituan":  "com.sankuai.meituan",
	"饿了么":      "me.ele",
	"eleme":    "me.ele",
	"大众点评":     "com.dianping.v1",
	"dianping": "com.dianping.v1",

	"高德地图":       "com.autonavi.minimap",
	"gaode":      "com.autonavi.minimap",
	"百度地图":       "com.baidu.BaiduMap",
	"baidumap":   "com.baidu.BaiduMap",
	"googlemaps": "com.google.android.apps.maps",
	"maps":       "com.google.android.apps.maps",

	"gmail":    "com.google.android.gm",
	"邮箱":       "com.google.android.gm",
	"chrome":   "com.android.chrome",
	"浏览器":      "com.android.chrome",
	"设置":       "com.android.settings",
	"settings": "com.android.settings",

	"home":     "com.miui.home",
	"桌面":       "com.miui.home",
	"launcher": "com.miui.home",
}

AppPackages maps app display names to Android package names

Functions ¶

func FormatSystemPrompt ¶

func FormatSystemPrompt(cfg PromptConfig) (string, error)

FormatSystemPrompt formats the system prompt with the given configuration.

func GetAppName ¶

func GetAppName(packageName string) string

GetAppName returns the display name for a package name

func GetPackageName ¶

func GetPackageName(appName string) string

GetPackageName returns the Android package name for an app display name

func GetSystemPromptTemplate ¶

func GetSystemPromptTemplate(lang PromptLanguage) prompts.ChatPromptTemplate

GetSystemPromptTemplate returns a langchaingo ChatPromptTemplate for the system prompt.

func GetUserPromptTemplate ¶

func GetUserPromptTemplate() prompts.ChatPromptTemplate

GetUserPromptTemplate returns a langchaingo ChatPromptTemplate for user messages.

func NewLoggingModel ¶

func NewLoggingModel(inner llms.Model, meta LoggingMeta) llms.Model

Types ¶

type AgentAdapter ¶

type AgentAdapter struct {
	// contains filtered or unexported fields
}

AgentAdapter adapts LangChainAgent to the existing uiagent interface. This allows the new langchaingo-based agent to be used with existing service layer.

func NewAgentAdapter ¶

func NewAgentAdapter(cfg AgentConfig, runner tools.UIRunner, platform tools.Platform) (*AgentAdapter, error)

NewAgentAdapter creates an adapter wrapping a LangChainAgent.

func (*AgentAdapter) ExecuteCleanupSteps ¶

func (a *AgentAdapter) ExecuteCleanupSteps(ctx context.Context)

ExecuteCleanupSteps delegates to the underlying agent

func (*AgentAdapter) GetInternalAgent ¶

func (a *AgentAdapter) GetInternalAgent() *LangChainAgent

GetInternalAgent returns the underlying LangChainAgent.

func (*AgentAdapter) GetLog ¶

func (a *AgentAdapter) GetLog() *RunLog

GetLog delegates to the underlying agent

func (*AgentAdapter) GetPlanSteps ¶

func (a *AgentAdapter) GetPlanSteps() []string

GetPlanSteps delegates to the underlying agent

func (*AgentAdapter) PlanStepActions ¶

func (a *AgentAdapter) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)

PlanStepActions delegates to the underlying agent

func (*AgentAdapter) PlanTask ¶

func (a *AgentAdapter) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)

PlanTask delegates to the underlying agent

func (*AgentAdapter) Reset ¶

func (a *AgentAdapter) Reset()

Reset delegates to the underlying agent

func (*AgentAdapter) Run ¶

func (a *AgentAdapter) Run(ctx context.Context, task string) (*uiagent.StepResult, error)

Run executes the full task and returns result compatible with existing interface.

func (*AgentAdapter) SetPlanSteps ¶

func (a *AgentAdapter) SetPlanSteps(steps []string)

SetPlanSteps delegates to the underlying agent

func (*AgentAdapter) SetTaskID ¶

func (a *AgentAdapter) SetTaskID(taskID string)

SetTaskID delegates to the underlying agent

func (*AgentAdapter) Step ¶

func (a *AgentAdapter) Step(ctx context.Context, task string) (*uiagent.StepResult, error)

Step executes a single step and returns result compatible with existing interface.

type AgentConfig ¶

type AgentConfig struct {
	// LLM settings
	BaseURL         string
	APIKey          string
	Model           string
	ReasoningEffort string // Reasoning effort level

	// Agent settings
	DeviceID        string
	Lang            string // "zh" or "en"
	MaxSteps        int
	Stream          bool
	SystemPrompt    string // Custom system prompt (optional)
	UseTaskPlanning bool   // Whether to use task decomposition planning

	// Callbacks for streaming
	OnToken func(token string)
}

AgentConfig holds configuration for the LangChain Agent.

type Client ¶

type Client struct {
	// contains filtered or unexported fields
}

Client wraps a langchaingo LLM model with additional functionality.

func NewClient ¶

func NewClient(cfg Config) (*Client, error)

NewClient creates a new LLM client using langchaingo's OpenAI implementation. It supports custom BaseURL for compatibility with various OpenAI-compatible APIs.

func (*Client) Call ¶

func (c *Client) Call(ctx context.Context, prompt string, options ...llms.CallOption) (string, error)

Call is a convenience method for calling the LLM with a simple prompt.

func (*Client) GenerateContent ¶

func (c *Client) GenerateContent(ctx context.Context, messages []llms.MessageContent, options ...llms.CallOption) (*llms.ContentResponse, error)

GenerateContent is a convenience method for generating content from the LLM.

func (*Client) GetConfig ¶

func (c *Client) GetConfig() Config

GetConfig returns the client configuration.

func (*Client) GetLLM ¶

func (c *Client) GetLLM() llms.Model

GetLLM returns the underlying langchaingo LLM model.

type Config ¶

type Config struct {
	BaseURL string // Custom API base URL (e.g., for proxies or alternative providers)
	APIKey  string // API key for authentication
	Model   string // Model name (e.g., "gpt-4-vision-preview")
}

Config holds the configuration for creating an LLM client.

type ConversationMemory ¶

type ConversationMemory struct {
	// contains filtered or unexported fields
}

ConversationMemory wraps langchaingo's memory for conversation management.

func NewConversationMemory ¶

func NewConversationMemory(cfg MemoryConfig) *ConversationMemory

NewConversationMemory creates a new conversation memory instance.

func (*ConversationMemory) Clear ¶

func (m *ConversationMemory) Clear(ctx context.Context) error

Clear clears all conversation history.

func (*ConversationMemory) GetBuffer ¶

func (m *ConversationMemory) GetBuffer() *memory.ConversationBuffer

GetBuffer returns the underlying conversation buffer.

func (*ConversationMemory) GetChatHistory ¶

func (m *ConversationMemory) GetChatHistory(ctx context.Context) (string, error)

GetChatHistory returns the chat history as a formatted string.

func (*ConversationMemory) LoadMemoryVariables ¶

func (m *ConversationMemory) LoadMemoryVariables(ctx context.Context) (map[string]interface{}, error)

LoadMemoryVariables loads the conversation history.

func (*ConversationMemory) SaveContext ¶

func (m *ConversationMemory) SaveContext(ctx context.Context, humanInput, aiOutput string) error

SaveContext saves a human-AI interaction to memory.

type LLMInteractionRecord ¶

type LLMInteractionRecord struct {
	TaskID       string                 `json:"task_id"`
	StepIndex    int                    `json:"step_index"` // -1 for PlanTask, >=0 for step actions
	RequestData  map[string]interface{} `json:"request_data"`
	ResponseData map[string]interface{} `json:"response_data"`
	LatencyMs    int64                  `json:"latency_ms"`
	Success      bool                   `json:"success"`
	Error        string                 `json:"error,omitempty"`
}

LLMInteractionRecord records LLM interaction for database storage

type LangChainAgent ¶

type LangChainAgent struct {
	// contains filtered or unexported fields
}

LangChainAgent implements UIAgent using langchaingo.

func NewLangChainAgent ¶

func NewLangChainAgent(cfg AgentConfig, runner tools.UIRunner, platform tools.Platform) (*LangChainAgent, error)

NewLangChainAgent creates a new LangChain-based agent.

func (*LangChainAgent) ExecuteCleanupSteps ¶

func (a *LangChainAgent) ExecuteCleanupSteps(ctx context.Context)

ExecuteCleanupSteps performs cleanup actions when task finishes: 1. Exit the current app (recorded as a step for replay) 2. Restore ADB keyboard to original IME This should be called by the service layer after the entire task completes.

func (*LangChainAgent) GetLog ¶

func (a *LangChainAgent) GetLog() *RunLog

GetLog returns the execution log.

func (*LangChainAgent) GetPlanSteps ¶

func (a *LangChainAgent) GetPlanSteps() []string

GetPlanSteps returns the planned task steps

func (*LangChainAgent) PlanStepActions ¶

func (a *LangChainAgent) PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)

PlanStepActions converts a single step into executable UI actions

func (*LangChainAgent) PlanTask ¶

func (a *LangChainAgent) PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)

PlanTask breaks down a task into steps using LLM

func (*LangChainAgent) Reset ¶

func (a *LangChainAgent) Reset()

Reset clears the agent state.

func (*LangChainAgent) Run ¶

func (a *LangChainAgent) Run(ctx context.Context, task string) (*StepResult, error)

Run executes the full task until completion or max steps using task decomposition.

func (*LangChainAgent) SetPlanSteps ¶

func (a *LangChainAgent) SetPlanSteps(steps []string)

SetPlanSteps sets the planned task steps

func (*LangChainAgent) SetTaskID ¶

func (a *LangChainAgent) SetTaskID(taskID string)

SetTaskID sets the current task ID for interaction records

func (*LangChainAgent) Step ¶

func (a *LangChainAgent) Step(ctx context.Context, task string) (*StepResult, error)

Step executes a single agent step by directly calling the LLM.

func (*LangChainAgent) StepWithPlan ¶

func (a *LangChainAgent) StepWithPlan(ctx context.Context, step string, stepIndex int) (*StepResult, error)

StepWithPlan executes a single planned step with enhanced context by directly calling the LLM.

type LoggingMeta ¶

type LoggingMeta struct {
	BaseURL string
	Model   string
}

type MemoryConfig ¶

type MemoryConfig struct {
	Type       MemoryType // Type of memory to use
	MaxHistory int        // Maximum number of messages to keep (for window type)
}

MemoryConfig holds the configuration for conversation memory.

func DefaultMemoryConfig ¶

func DefaultMemoryConfig() MemoryConfig

DefaultMemoryConfig returns the default memory configuration.

type MemoryType ¶

type MemoryType string

MemoryType represents the type of conversation memory.

const (
	MemoryTypeBuffer  MemoryType = "buffer"  // Full conversation buffer
	MemoryTypeWindow  MemoryType = "window"  // Sliding window of recent messages
	MemoryTypeSummary MemoryType = "summary" // Summarized conversation history
)

type OperationRecord ¶

type OperationRecord struct {
	StepIndex int
	Thinking  string
	Action    string
	Result    string
	Timestamp time.Time
}

OperationRecord records a single operation in history.

type ParsedAction ¶

type ParsedAction struct {
	Name string
	Args map[string]interface{}
	Raw  string
}

ParsedAction represents a parsed action from the agent.

type PlanStepActionsResult ¶

type PlanStepActionsResult struct {
	Actions     []string              `json:"actions"` // Planned actions for the step
	Interaction *LLMInteractionRecord `json:"-"`       // LLM interaction record for database storage
}

PlanStepActionsResult contains the result of step action planning

type PlanTaskResult ¶

type PlanTaskResult struct {
	Steps       []string              `json:"steps"` // Planned task steps
	Interaction *LLMInteractionRecord `json:"-"`     // LLM interaction record for database storage
}

PlanTaskResult contains the result of task planning

type PlatformAdapter ¶

type PlatformAdapter struct {
	// contains filtered or unexported fields
}

PlatformAdapter adapts existing platform to tools.Platform interface.

func NewPlatformAdapter ¶

func NewPlatformAdapter(p PlatformCapturer) *PlatformAdapter

NewPlatformAdapter creates a platform adapter.

func (*PlatformAdapter) CaptureState ¶

func (p *PlatformAdapter) CaptureState(ctx context.Context) (*tools.ScreenState, error)

CaptureState captures the current screen state.

func (*PlatformAdapter) DeviceID ¶

func (p *PlatformAdapter) DeviceID() string

DeviceID returns the device identifier.

func (*PlatformAdapter) GetInstalledApps ¶

func (p *PlatformAdapter) GetInstalledApps(ctx context.Context) (map[string]string, error)

GetInstalledApps returns a map of app name to package name.

func (*PlatformAdapter) SetDeviceID ¶

func (p *PlatformAdapter) SetDeviceID(deviceID string)

SetDeviceID sets the device ID for the platform.

type PlatformCapturer ¶

type PlatformCapturer interface {
	CaptureState(ctx context.Context) (uiagent.ScreenInfo, error)
}

PlatformCapturer is the interface for platforms that can capture screen state.

type PlatformWithApps ¶

type PlatformWithApps interface {
	GetInstalledApps(ctx context.Context) (map[string]string, error)
}

PlatformWithApps is an optional interface for platforms that can list installed apps.

type PromptConfig ¶

type PromptConfig struct {
	Language    PromptLanguage
	OSVersion   string
	DeviceModel string
}

PromptConfig holds the configuration for prompt generation.

type PromptLanguage ¶

type PromptLanguage string

PromptLanguage represents the language for prompts.

const (
	PromptLangChinese PromptLanguage = "zh"
	PromptLangEnglish PromptLanguage = "en"
)

type RunLog ¶

type RunLog struct {
	Task      string
	DeviceID  string
	Steps     []StepExecution
	Variables map[string]interface{}
}

RunLog records the execution history.

type RunnerAdapter ¶

type RunnerAdapter struct {
	// contains filtered or unexported fields
}

RunnerAdapter adapts existing UIRunner (RunStep-based) to tools.UIRunner interface.

func NewRunnerAdapter ¶

func NewRunnerAdapter(r StepRunner) *RunnerAdapter

NewRunnerAdapter creates a runner adapter.

func (*RunnerAdapter) Back ¶

func (r *RunnerAdapter) Back(ctx context.Context) error

Back implements tools.UIRunner

func (*RunnerAdapter) ClearLastOutput ¶

func (r *RunnerAdapter) ClearLastOutput()

ClearLastOutput clears cached output from previous action.

func (*RunnerAdapter) DoubleTap ¶

func (r *RunnerAdapter) DoubleTap(ctx context.Context, x, y int) error

DoubleTap implements tools.UIRunner

func (*RunnerAdapter) Home ¶

func (r *RunnerAdapter) Home(ctx context.Context) error

Home implements tools.UIRunner

func (*RunnerAdapter) LastOutput ¶

func (r *RunnerAdapter) LastOutput() map[string]interface{}

LastOutput returns the cached output from the most recent RunStep execution.

func (*RunnerAdapter) Launch ¶

func (r *RunnerAdapter) Launch(ctx context.Context, packageName string) error

Launch implements tools.UIRunner

func (*RunnerAdapter) LongPress ¶

func (r *RunnerAdapter) LongPress(ctx context.Context, x, y int, durationMs int) error

LongPress implements tools.UIRunner

func (*RunnerAdapter) SetDeviceID ¶

func (r *RunnerAdapter) SetDeviceID(deviceID string)

SetDeviceID sets the device ID for the runner.

func (*RunnerAdapter) Swipe ¶

func (r *RunnerAdapter) Swipe(ctx context.Context, startX, startY, endX, endY int) error

Swipe implements tools.UIRunner

func (*RunnerAdapter) Tap ¶

func (r *RunnerAdapter) Tap(ctx context.Context, x, y int) error

Tap implements tools.UIRunner

func (*RunnerAdapter) Type ¶

func (r *RunnerAdapter) Type(ctx context.Context, text string) error

Type implements tools.UIRunner

func (*RunnerAdapter) Wait ¶

func (r *RunnerAdapter) Wait(ctx context.Context, durationMs int) error

Wait implements tools.UIRunner

type StepExecution ¶

type StepExecution struct {
	Index     int
	Action    string
	Result    uiagent.StepExecResult
	Timestamp time.Time
}

StepExecution records a single step execution.

type StepResult ¶

type StepResult struct {
	Success  bool
	Finished bool
	Thinking string
	Message  string
	Action   *ParsedAction
	Log      *RunLog
	Err      error
}

StepResult represents the result of a single agent step.

type StepRunner ¶

type StepRunner interface {
	RunStep(ctx context.Context, step uiagent.ActionStep) (uiagent.StepExecResult, error)
}

StepRunner is the interface for runners that execute ActionSteps.

type UIAgent ¶

type UIAgent interface {
	Run(ctx context.Context, task string) (*StepResult, error)
	Step(ctx context.Context, task string) (*StepResult, error)
	PlanTask(ctx context.Context, task string) (*PlanTaskResult, error)
	PlanStepActions(ctx context.Context, step string) (*PlanStepActionsResult, error)
	Reset()
	GetLog() *RunLog
	SetTaskID(taskID string)
	GetPlanSteps() []string
	SetPlanSteps(steps []string)
}

UIAgent defines the interface for UI automation agent.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
tools

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL