agent

package
v0.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 10, 2026 License: MIT Imports: 37 Imported by: 0

Documentation

Index

Constants

View Source
const (
	UserStateSpeaking  UserState = "speaking"
	UserStateListening UserState = "listening"
	UserStateAway      UserState = "away"

	AgentStateInitializing AgentState = "initializing"
	AgentStateIdle         AgentState = "idle"
	AgentStateListening    AgentState = "listening"
	AgentStateThinking     AgentState = "thinking"
	AgentStateSpeaking     AgentState = "speaking"
)
View Source
const (
	SpeechPriorityLow    = 0
	SpeechPriorityNormal = 5
	SpeechPriorityHigh   = 10
	InterruptionTimeout  = 5 * time.Second
)

Variables

This section is empty.

Functions

func PerformToolExecutions

func PerformToolExecutions(
	ctx context.Context,
	functionCh <-chan *llm.FunctionToolCall,
	toolCtx *llm.ToolContext,
) <-chan ToolExecutionOutput

func RegisterPlugin

func RegisterPlugin(p Plugin)

func UploadSessionReport

func UploadSessionReport(
	cloudURL string,
	apiKey string,
	apiSecret string,
	agentName string,
	report *SessionReport,
) error

func WithRunContext

func WithRunContext(ctx context.Context, rc *RunContext) context.Context

Types

type Agent

type Agent struct {
	ID           string
	Instructions string
	ChatCtx      *llm.ChatContext
	Tools        []llm.Tool

	TurnDetection TurnDetectionMode
	TurnDetector  TurnDetector
	STT           stt.STT
	VAD           vad.VAD
	LLM           llm.LLM
	TTS           tts.TTS

	AllowInterruptions        bool
	MinConsecutiveSpeechDelay float64
	UseTTSAlignedTranscript   bool
	MinEndpointingDelay       float64
	MaxEndpointingDelay       float64
	// contains filtered or unexported fields
}

func NewAgent

func NewAgent(instructions string) *Agent

func (*Agent) GetActivity

func (a *Agent) GetActivity() *AgentActivity

func (*Agent) GetAgent

func (a *Agent) GetAgent() *Agent

func (*Agent) OnEnter

func (a *Agent) OnEnter()

func (*Agent) OnExit

func (a *Agent) OnExit()

func (*Agent) OnUserTurnCompleted

func (a *Agent) OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error

func (*Agent) Start

func (a *Agent) Start(session *AgentSession, agentIntf AgentInterface) *AgentActivity

func (*Agent) UpdateInstructions

func (a *Agent) UpdateInstructions(ctx context.Context, instructions string) error

func (*Agent) UpdateTools

func (a *Agent) UpdateTools(ctx context.Context, tools []llm.Tool) error

type AgentActivity

type AgentActivity struct {
	AgentIntf AgentInterface
	Agent     *Agent
	Session   *AgentSession
	// contains filtered or unexported fields
}

AgentActivity handles the internal event loops, I/O processing, and speech generation queue for an Agent.

func NewAgentActivity

func NewAgentActivity(agentIntf AgentInterface, session *AgentSession) *AgentActivity

func (*AgentActivity) OnEndOfSpeech

func (a *AgentActivity) OnEndOfSpeech(ev *vad.VADEvent)

func (*AgentActivity) OnFinalTranscript

func (a *AgentActivity) OnFinalTranscript(ev *stt.SpeechEvent)

func (*AgentActivity) OnStartOfSpeech

func (a *AgentActivity) OnStartOfSpeech(ev *vad.VADEvent)

Event callbacks from RecognitionHooks

func (*AgentActivity) ScheduleSpeech

func (a *AgentActivity) ScheduleSpeech(speech *SpeechHandle, priority int, force bool) error

func (*AgentActivity) Start

func (a *AgentActivity) Start()

func (*AgentActivity) Stop

func (a *AgentActivity) Stop()

type AgentInterface

type AgentInterface interface {
	OnEnter()
	OnExit()
	OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
	GetAgent() *Agent
	GetActivity() *AgentActivity
}

type AgentSession

type AgentSession struct {
	Options AgentSessionOptions

	ChatCtx   *llm.ChatContext
	Agent     AgentInterface
	STT       stt.STT
	VAD       vad.VAD
	LLM       llm.LLM
	TTS       tts.TTS
	Tools     []llm.Tool
	Assistant *PipelineAgent
	Room      *lksdk.Room

	MetricsCollector *telemetry.UsageCollector

	UserState  UserState
	AgentState AgentState

	// Transcript attribution — set by RoomIO when tracks are established.
	RemoteUserIdentity string
	RemoteTrackSID     string
	AgentTrackSID      string

	// Event channels
	AgentStateChangedCh chan AgentStateChangedEvent
	UserStateChangedCh  chan UserStateChangedEvent
	// contains filtered or unexported fields
}

func NewAgentSession

func NewAgentSession(agent AgentInterface, room *lksdk.Room, opts AgentSessionOptions) *AgentSession

func (*AgentSession) GenerateReply

func (s *AgentSession) GenerateReply(ctx context.Context, userInput string) error

func (*AgentSession) GetAgentTrackSID

func (s *AgentSession) GetAgentTrackSID() string

func (*AgentSession) OnAudioFrame

func (s *AgentSession) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*AgentSession) PublishAgentTranscript

func (s *AgentSession) PublishAgentTranscript(text string)

PublishAgentTranscript publishes the agent's LLM response to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).

func (*AgentSession) PublishUserTranscript

func (s *AgentSession) PublishUserTranscript(text string)

PublishUserTranscript publishes the user's STT transcript to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).

func (*AgentSession) SetAgentTrackSID

func (s *AgentSession) SetAgentTrackSID(sid string)

func (*AgentSession) SetRemoteTrackSID

func (s *AgentSession) SetRemoteTrackSID(sid string)

func (*AgentSession) SetRemoteUserIdentity

func (s *AgentSession) SetRemoteUserIdentity(identity string)

func (*AgentSession) Start

func (s *AgentSession) Start(ctx context.Context) error

func (*AgentSession) Stop

func (s *AgentSession) Stop(ctx context.Context) error

func (*AgentSession) UpdateAgentState

func (s *AgentSession) UpdateAgentState(state AgentState)

func (*AgentSession) UpdateUserState

func (s *AgentSession) UpdateUserState(state UserState)

type AgentSessionOptions

type AgentSessionOptions struct {
	AllowInterruptions            bool
	DiscardAudioIfUninterruptible bool
	MinInterruptionDuration       float64
	MinInterruptionWords          int
	MinEndpointingDelay           float64
	MaxEndpointingDelay           float64
	MaxToolSteps                  int
	UserAwayTimeout               float64
	FalseInterruptionTimeout      float64
	ResumeFalseInterruption       bool
	MinConsecutiveSpeechDelay     float64
	UseTTSAlignedTranscript       bool
	PreemptiveGeneration          bool
	AECWarmupDuration             float64
}

type AgentState

type AgentState string

type AgentStateChangedEvent

type AgentStateChangedEvent struct {
	OldState AgentState
	NewState AgentState
}

type AgentTask

type AgentTask[T any] struct {
	Agent
	Result chan T
	Err    chan error
}

AgentTask represents a sub-agent execution that returns a result

func NewAgentTask

func NewAgentTask[T any](instructions string) *AgentTask[T]

func (*AgentTask[T]) Complete

func (t *AgentTask[T]) Complete(result T)

func (*AgentTask[T]) Fail

func (t *AgentTask[T]) Fail(err error)

func (*AgentTask[T]) WaitAny

func (t *AgentTask[T]) WaitAny(ctx context.Context) (any, error)

type AudioConfig

type AudioConfig struct {
	Source      AudioSource
	Volume      float64
	Probability float64
}

type AudioRecognition

type AudioRecognition struct {
	// contains filtered or unexported fields
}

func NewAudioRecognition

func NewAudioRecognition(session *AgentSession, hooks RecognitionHooks, s stt.STT, v vad.VAD) *AudioRecognition

func (*AudioRecognition) PushAudio

func (ar *AudioRecognition) PushAudio(frame *model.AudioFrame) error

func (*AudioRecognition) Start

func (ar *AudioRecognition) Start(ctx context.Context) error

type AudioSource

type AudioSource interface{} // Can be string, BuiltinAudioClip, or <-chan *model.AudioFrame

type Avatar

type Avatar struct {
	State AvatarState
}

func NewAvatar

func NewAvatar() *Avatar

func (*Avatar) Start

func (a *Avatar) Start(ctx context.Context) error

type AvatarIO

type AvatarIO interface {
	SendAvatarData(ctx context.Context, data []byte) error
}

AvatarIO defines how Avatar commands/data are sent.

type AvatarRunner

type AvatarRunner struct {
	// contains filtered or unexported fields
}

AvatarRunner coordinates Avatar IO and LipSync events.

func NewAvatarRunner

func NewAvatarRunner(io AvatarIO) *AvatarRunner

func (*AvatarRunner) SimulateLipSync

func (r *AvatarRunner) SimulateLipSync(text string)

SimulateLipSync takes text (from TranscriptSynchronizer) and simulates basic lip movements

func (*AvatarRunner) Start

func (r *AvatarRunner) Start(ctx context.Context) error

func (*AvatarRunner) Stop

func (r *AvatarRunner) Stop()

type AvatarState

type AvatarState string
const (
	AvatarStateIdle     AvatarState = "idle"
	AvatarStateSpeaking AvatarState = "speaking"
)

type BackgroundAudioPlayer

type BackgroundAudioPlayer struct {
	// contains filtered or unexported fields
}

func NewBackgroundAudioPlayer

func NewBackgroundAudioPlayer(ambientSound, thinkingSound interface{}) *BackgroundAudioPlayer

func (*BackgroundAudioPlayer) AgentStateChanged

func (p *BackgroundAudioPlayer) AgentStateChanged(newState AgentState)

func (*BackgroundAudioPlayer) Close

func (p *BackgroundAudioPlayer) Close() error

func (*BackgroundAudioPlayer) Play

func (p *BackgroundAudioPlayer) Play(audio interface{}, loop bool) *PlayHandle

func (*BackgroundAudioPlayer) Start

func (p *BackgroundAudioPlayer) Start(room *lksdk.Room, agentSession *AgentSession) error

type BuiltinAudioClip

type BuiltinAudioClip string
const (
	CityAmbience    BuiltinAudioClip = "city-ambience.ogg"
	ForestAmbience  BuiltinAudioClip = "forest-ambience.ogg"
	OfficeAmbience  BuiltinAudioClip = "office-ambience.ogg"
	CrowdedRoom     BuiltinAudioClip = "crowded-room.ogg"
	KeyboardTyping  BuiltinAudioClip = "keyboard-typing.ogg"
	KeyboardTyping2 BuiltinAudioClip = "keyboard-typing2.ogg"
	HoldMusic       BuiltinAudioClip = "hold_music.ogg"
)

func (BuiltinAudioClip) Path

func (b BuiltinAudioClip) Path() string

type ClientEventPayload

type ClientEventPayload struct {
	Type  string `json:"type"`
	State string `json:"state,omitempty"`
}

type ClientEventsDispatcher

type ClientEventsDispatcher struct {
	// contains filtered or unexported fields
}

ClientEventsDispatcher manages sending Agent states to the LiveKit Room DataChannel

func NewClientEventsDispatcher

func NewClientEventsDispatcher(room *lksdk.Room) *ClientEventsDispatcher

func (*ClientEventsDispatcher) DispatchAgentState

func (d *ClientEventsDispatcher) DispatchAgentState(state AgentState)

DispatchAgentState emits AgentStateIdle, AgentStateThinking, AgentStateSpeaking

func (*ClientEventsDispatcher) DispatchUserState

func (d *ClientEventsDispatcher) DispatchUserState(state UserState)

DispatchUserState emits UserStateListening, UserStateSpeaking

type CloseEvent

type CloseEvent struct {
	Reason    CloseReason
	Error     error
	CreatedAt time.Time
}

func (*CloseEvent) GetType

func (e *CloseEvent) GetType() string

type CloseReason

type CloseReason string
const (
	CloseReasonError                   CloseReason = "error"
	CloseReasonJobShutdown             CloseReason = "job_shutdown"
	CloseReasonParticipantDisconnected CloseReason = "participant_disconnected"
	CloseReasonUserInitiated           CloseReason = "user_initiated"
)

type ConversationItemAddedEvent

type ConversationItemAddedEvent struct {
	Item      llm.ChatItem
	CreatedAt time.Time
}

func (*ConversationItemAddedEvent) GetType

func (e *ConversationItemAddedEvent) GetType() string

type DataStreamIO

type DataStreamIO struct {
	// contains filtered or unexported fields
}

func NewDataStreamIO

func NewDataStreamIO(room *lksdk.Room) *DataStreamIO

func (*DataStreamIO) SendAvatarData

func (io *DataStreamIO) SendAvatarData(ctx context.Context, data []byte) error

type DtmfEvent

type DtmfEvent struct {
	Digit string
	Time  time.Time
}

type EndOfTurnInfo

type EndOfTurnInfo struct {
	SkipReply            bool
	NewTranscript        string
	TranscriptConfidence float64
	StartedSpeakingAt    *float64
	StoppedSpeakingAt    *float64
}

type EvaluationResult

type EvaluationResult struct {
	Judgments map[string]string
}

type Event

type Event interface {
	GetType() string
}

type IVRActivity

type IVRActivity struct {
	AgentIntf AgentInterface
	Agent     *Agent
	// contains filtered or unexported fields
}

func NewIVRActivity

func NewIVRActivity(agentIntf AgentInterface) *IVRActivity

func (*IVRActivity) OnDtmf

func (i *IVRActivity) OnDtmf(digit string)

func (*IVRActivity) SetDigitCallback

func (i *IVRActivity) SetDigitCallback(timeout time.Duration, cb func(buffer string) (bool, error))

func (*IVRActivity) Start

func (i *IVRActivity) Start()

func (*IVRActivity) Stop

func (i *IVRActivity) Stop()

type InputDetails

type InputDetails struct {
	Modality string
}

func DefaultInputDetails

func DefaultInputDetails() InputDetails

type LLMGenerationData

type LLMGenerationData struct {
	TextCh     chan string
	FunctionCh chan *llm.FunctionToolCall
	FullTextCh chan string // receives the complete assembled text when streaming is done
	Usage      *llm.CompletionUsage
}

func PerformLLMInference

func PerformLLMInference(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []llm.Tool) (*LLMGenerationData, error)

type LLMTurnDetector

type LLMTurnDetector struct {
	// contains filtered or unexported fields
}

LLMTurnDetector uses an LLM to predict if the user has finished speaking. It sends the recent conversation history to the LLM and asks for a probability score.

func NewLLMTurnDetector

func NewLLMTurnDetector(llmInstance llm.LLM) *LLMTurnDetector

func (*LLMTurnDetector) PredictEndOfTurn

func (m *LLMTurnDetector) PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)

type MetricsCollectedEvent

type MetricsCollectedEvent struct {
	Metrics   telemetry.AgentMetrics
	CreatedAt time.Time
}

func (*MetricsCollectedEvent) GetType

func (e *MetricsCollectedEvent) GetType() string

type MultimodalAgent

type MultimodalAgent struct {
	PublishAudio func(frame *model.AudioFrame) error
	// contains filtered or unexported fields
}

func NewMultimodalAgent

func NewMultimodalAgent(
	m llm.RealtimeModel,
	chatCtx *llm.ChatContext,
) *MultimodalAgent

func (*MultimodalAgent) OnAudioFrame

func (ma *MultimodalAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*MultimodalAgent) Start

func (ma *MultimodalAgent) Start(ctx context.Context, s *AgentSession) error

type PipelineAgent

type PipelineAgent struct {
	LLM llm.LLM

	PublishAudio func(frame *model.AudioFrame) error
	// contains filtered or unexported fields
}

func NewPipelineAgent

func NewPipelineAgent(
	vad vad.VAD,
	stt stt.STT,
	llmObj llm.LLM,
	tts tts.TTS,
	chatCtx *llm.ChatContext,
) *PipelineAgent

func (*PipelineAgent) OnAudioFrame

func (va *PipelineAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*PipelineAgent) Start

func (va *PipelineAgent) Start(ctx context.Context, s *AgentSession) error

type PlayHandle

type PlayHandle struct {
	// contains filtered or unexported fields
}

func (*PlayHandle) Done

func (h *PlayHandle) Done() bool

func (*PlayHandle) Stop

func (h *PlayHandle) Stop()

func (*PlayHandle) WaitForPlayout

func (h *PlayHandle) WaitForPlayout()

type Plugin

type Plugin interface {
	Title() string
	Version() string
	Package() string
	DownloadFiles() error
}

func RegisteredPlugins

func RegisteredPlugins() []Plugin

type QueueIO

type QueueIO struct {
	// contains filtered or unexported fields
}

func NewQueueIO

func NewQueueIO() *QueueIO

func (*QueueIO) ReadQueue

func (io *QueueIO) ReadQueue() <-chan []byte

func (*QueueIO) SendAvatarData

func (io *QueueIO) SendAvatarData(ctx context.Context, data []byte) error

type RecognitionHooks

type RecognitionHooks interface {
	OnStartOfSpeech(ev *vad.VADEvent)
	OnEndOfSpeech(ev *vad.VADEvent)
	OnFinalTranscript(ev *stt.SpeechEvent)
}

type RecordingOptions

type RecordingOptions struct {
	Audio      bool `json:"audio"`
	Traces     bool `json:"traces"`
	Logs       bool `json:"logs"`
	Transcript bool `json:"transcript"`
}

type RunAssert

type RunAssert struct {
	ChatCtx *llm.ChatContext
	// contains filtered or unexported fields
}

func (*RunAssert) ContainsMessage

func (a *RunAssert) ContainsMessage(role llm.ChatRole, content string) *RunAssert

func (*RunAssert) HasError

func (a *RunAssert) HasError() error

func (*RunAssert) IsFunctionCall

func (a *RunAssert) IsFunctionCall(name string) *RunAssert

func (*RunAssert) Judge

func (a *RunAssert) Judge(ctx context.Context, evaluator evals.Evaluator, llmInstance llm.LLM) (*RunAssert, error)

type RunContext

type RunContext struct {
	Session      *AgentSession
	SpeechHandle *SpeechHandle
	FunctionCall *llm.FunctionCall
}

func GetRunContext

func GetRunContext(ctx context.Context) *RunContext

func (*RunContext) WaitForPlayout

func (r *RunContext) WaitForPlayout(ctx context.Context) error

type RunResult

type RunResult struct {
	ChatCtx *llm.ChatContext
	Expect  *RunAssert
}

func NewRunResult

func NewRunResult(chatCtx *llm.ChatContext) *RunResult

type SessionReport

type SessionReport struct {
	RecordingOptions        RecordingOptions    `json:"recording_options"`
	JobID                   string              `json:"job_id"`
	RoomID                  string              `json:"room_id"`
	Room                    string              `json:"room"`
	Options                 AgentSessionOptions `json:"options"`
	Events                  []any               `json:"events"`
	ChatHistory             *llm.ChatContext    `json:"chat_history"`
	AudioRecordingPath      *string             `json:"audio_recording_path,omitempty"`
	AudioRecordingStartedAt *float64            `json:"audio_recording_started_at,omitempty"`
	Duration                *float64            `json:"duration,omitempty"`
	StartedAt               *float64            `json:"started_at,omitempty"`
	Timestamp               float64             `json:"timestamp"`
}

func NewSessionReport

func NewSessionReport() *SessionReport

type SpeechCreatedEvent

type SpeechCreatedEvent struct {
	UserInitiated bool
	Source        string // "say" or "generate_reply"
	SpeechHandle  *SpeechHandle
	CreatedAt     time.Time
}

func (*SpeechCreatedEvent) GetType

func (e *SpeechCreatedEvent) GetType() string

type SpeechHandle

type SpeechHandle struct {
	ID                 string
	AllowInterruptions bool
	InputDetails       InputDetails
	Priority           int
	CreatedAt          time.Time
	// contains filtered or unexported fields
}

func NewSpeechHandle

func NewSpeechHandle(allowInterruptions bool, inputDetails InputDetails) *SpeechHandle

func (*SpeechHandle) Interrupt

func (s *SpeechHandle) Interrupt(force bool) error

func (*SpeechHandle) IsDone

func (s *SpeechHandle) IsDone() bool

func (*SpeechHandle) IsInterrupted

func (s *SpeechHandle) IsInterrupted() bool

func (*SpeechHandle) IsScheduled

func (s *SpeechHandle) IsScheduled() bool

func (*SpeechHandle) MarkDone

func (s *SpeechHandle) MarkDone()

func (*SpeechHandle) MarkScheduled

func (s *SpeechHandle) MarkScheduled()

func (*SpeechHandle) Wait

func (s *SpeechHandle) Wait(ctx context.Context) error

type TTSGenerationData

type TTSGenerationData struct {
	AudioCh chan *model.AudioFrame
	TTFB    time.Duration
}

func PerformTTSInference

func PerformTTSInference(ctx context.Context, t tts.TTS, textCh <-chan string) (*TTSGenerationData, error)

type Tagger

type Tagger struct {
	// contains filtered or unexported fields
}

func NewTagger

func NewTagger() *Tagger

func (*Tagger) Add

func (t *Tagger) Add(tag string)

func (*Tagger) Evaluation

func (t *Tagger) Evaluation(result *EvaluationResult)

func (*Tagger) Fail

func (t *Tagger) Fail(reason string)

func (*Tagger) OutcomeReason

func (t *Tagger) OutcomeReason() string

func (*Tagger) Remove

func (t *Tagger) Remove(tag string)

func (*Tagger) Success

func (t *Tagger) Success(reason string)

func (*Tagger) Tags

func (t *Tagger) Tags() []string

type TaskWaiter

type TaskWaiter interface {
	WaitAny(ctx context.Context) (any, error)
}

type ToolExecutionOutput

type ToolExecutionOutput struct {
	FncCall    llm.FunctionCall
	FncCallOut *llm.FunctionCallOutput
	RawOutput  any
	RawError   error
}

type TranscriptSynchronizer

type TranscriptSynchronizer struct {
	// contains filtered or unexported fields
}

TranscriptSynchronizer drip-feeds text to match the playout speed of audio.

func NewTranscriptSynchronizer

func NewTranscriptSynchronizer(speakingRate float64) *TranscriptSynchronizer

NewTranscriptSynchronizer initializes the synchronizer. Default speaking rate is usually ~3.83 syllables/sec.

func (*TranscriptSynchronizer) Close

func (s *TranscriptSynchronizer) Close()

func (*TranscriptSynchronizer) EventCh

func (s *TranscriptSynchronizer) EventCh() <-chan string

func (*TranscriptSynchronizer) Interrupt

func (s *TranscriptSynchronizer) Interrupt()

Interrupt immediately flushes the remaining text buffer to the event channel and stops syncing.

func (*TranscriptSynchronizer) PushAudio

func (s *TranscriptSynchronizer) PushAudio(frame *model.AudioFrame)

func (*TranscriptSynchronizer) PushText

func (s *TranscriptSynchronizer) PushText(text string)

type TranscriptionFilter

type TranscriptionFilter struct {
	SpeakingRate float64
}

func NewTranscriptionFilter

func NewTranscriptionFilter() *TranscriptionFilter

type TurnDetectionMode

type TurnDetectionMode string
const (
	TurnDetectionModeSTT         TurnDetectionMode = "stt"
	TurnDetectionModeVAD         TurnDetectionMode = "vad"
	TurnDetectionModeRealtimeLLM TurnDetectionMode = "realtime_llm"
	TurnDetectionModeManual      TurnDetectionMode = "manual"
)

type TurnDetector

type TurnDetector interface {
	PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)
}

type UserInputTranscribedEvent

type UserInputTranscribedEvent struct {
	Language   string
	Transcript string
	IsFinal    bool
	SpeakerID  string
	CreatedAt  time.Time
}

func (*UserInputTranscribedEvent) GetType

func (e *UserInputTranscribedEvent) GetType() string

type UserState

type UserState string

type UserStateChangedEvent

type UserStateChangedEvent struct {
	OldState UserState
	NewState UserState
}

type VoiceActivityVideoSampler

type VoiceActivityVideoSampler struct {
	// contains filtered or unexported fields
}

VoiceActivityVideoSampler samples video frames at a reduced rate (e.g. 1 fps) only when the user is speaking, to reduce LLM context token usage.

func NewVoiceActivityVideoSampler

func NewVoiceActivityVideoSampler(session *AgentSession, sampleRate float64, opts images.EncodeOptions) *VoiceActivityVideoSampler

func (*VoiceActivityVideoSampler) OnVideoFrame

func (s *VoiceActivityVideoSampler) OnVideoFrame(ctx context.Context, frame *images.VideoFrame) bool

OnVideoFrame should be called for every incoming WebRTC video frame. It returns true if the frame should be forwarded to the LLM.

func (*VoiceActivityVideoSampler) SetSpeaking

func (s *VoiceActivityVideoSampler) SetSpeaking(speaking bool)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL