Documentation
¶
Index ¶
- Constants
- func PerformToolExecutions(ctx context.Context, functionCh <-chan *llm.FunctionToolCall, ...) <-chan ToolExecutionOutput
- func RegisterPlugin(p Plugin)
- func UploadSessionReport(cloudURL string, apiKey string, apiSecret string, agentName string, ...) error
- func WithRunContext(ctx context.Context, rc *RunContext) context.Context
- type Agent
- func (a *Agent) GetActivity() *AgentActivity
- func (a *Agent) GetAgent() *Agent
- func (a *Agent) OnEnter()
- func (a *Agent) OnExit()
- func (a *Agent) OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
- func (a *Agent) Start(session *AgentSession, agentIntf AgentInterface) *AgentActivity
- func (a *Agent) UpdateInstructions(ctx context.Context, instructions string) error
- func (a *Agent) UpdateTools(ctx context.Context, tools []llm.Tool) error
- type AgentActivity
- func (a *AgentActivity) OnEndOfSpeech(ev *vad.VADEvent)
- func (a *AgentActivity) OnFinalTranscript(ev *stt.SpeechEvent)
- func (a *AgentActivity) OnStartOfSpeech(ev *vad.VADEvent)
- func (a *AgentActivity) ScheduleSpeech(speech *SpeechHandle, priority int, force bool) error
- func (a *AgentActivity) Start()
- func (a *AgentActivity) Stop()
- type AgentInterface
- type AgentSession
- func (s *AgentSession) GenerateReply(ctx context.Context, userInput string) error
- func (s *AgentSession) GetAgentTrackSID() string
- func (s *AgentSession) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
- func (s *AgentSession) PublishAgentTranscript(text string)
- func (s *AgentSession) PublishUserTranscript(text string)
- func (s *AgentSession) SetAgentTrackSID(sid string)
- func (s *AgentSession) SetRemoteTrackSID(sid string)
- func (s *AgentSession) SetRemoteUserIdentity(identity string)
- func (s *AgentSession) Start(ctx context.Context) error
- func (s *AgentSession) Stop(ctx context.Context) error
- func (s *AgentSession) UpdateAgentState(state AgentState)
- func (s *AgentSession) UpdateUserState(state UserState)
- type AgentSessionOptions
- type AgentState
- type AgentStateChangedEvent
- type AgentTask
- type AudioConfig
- type AudioRecognition
- type AudioSource
- type Avatar
- type AvatarIO
- type AvatarRunner
- type AvatarState
- type BackgroundAudioPlayer
- type BuiltinAudioClip
- type ClientEventPayload
- type ClientEventsDispatcher
- type CloseEvent
- type CloseReason
- type ConversationItemAddedEvent
- type DataStreamIO
- type DtmfEvent
- type EndOfTurnInfo
- type EvaluationResult
- type Event
- type IVRActivity
- type InputDetails
- type LLMGenerationData
- type LLMTurnDetector
- type MetricsCollectedEvent
- type MultimodalAgent
- type PipelineAgent
- type PlayHandle
- type Plugin
- type QueueIO
- type RecognitionHooks
- type RecordingOptions
- type RunAssert
- type RunContext
- type RunResult
- type SessionReport
- type SpeechCreatedEvent
- type SpeechHandle
- func (s *SpeechHandle) Interrupt(force bool) error
- func (s *SpeechHandle) IsDone() bool
- func (s *SpeechHandle) IsInterrupted() bool
- func (s *SpeechHandle) IsScheduled() bool
- func (s *SpeechHandle) MarkDone()
- func (s *SpeechHandle) MarkScheduled()
- func (s *SpeechHandle) Wait(ctx context.Context) error
- type TTSGenerationData
- type Tagger
- type TaskWaiter
- type ToolExecutionOutput
- type TranscriptSynchronizer
- type TranscriptionFilter
- type TurnDetectionMode
- type TurnDetector
- type UserInputTranscribedEvent
- type UserState
- type UserStateChangedEvent
- type VoiceActivityVideoSampler
Constants ¶
const ( UserStateSpeaking UserState = "speaking" UserStateListening UserState = "listening" UserStateAway UserState = "away" AgentStateInitializing AgentState = "initializing" AgentStateIdle AgentState = "idle" AgentStateListening AgentState = "listening" AgentStateThinking AgentState = "thinking" AgentStateSpeaking AgentState = "speaking" )
const ( SpeechPriorityLow = 0 SpeechPriorityNormal = 5 SpeechPriorityHigh = 10 InterruptionTimeout = 5 * time.Second )
Variables ¶
This section is empty.
Functions ¶
func PerformToolExecutions ¶
func PerformToolExecutions( ctx context.Context, functionCh <-chan *llm.FunctionToolCall, toolCtx *llm.ToolContext, ) <-chan ToolExecutionOutput
func RegisterPlugin ¶
func RegisterPlugin(p Plugin)
func UploadSessionReport ¶
func WithRunContext ¶
func WithRunContext(ctx context.Context, rc *RunContext) context.Context
Types ¶
type Agent ¶
type Agent struct {
ID string
Instructions string
ChatCtx *llm.ChatContext
Tools []llm.Tool
TurnDetection TurnDetectionMode
TurnDetector TurnDetector
STT stt.STT
VAD vad.VAD
LLM llm.LLM
TTS tts.TTS
AllowInterruptions bool
MinConsecutiveSpeechDelay float64
UseTTSAlignedTranscript bool
MinEndpointingDelay float64
MaxEndpointingDelay float64
// contains filtered or unexported fields
}
func (*Agent) GetActivity ¶
func (a *Agent) GetActivity() *AgentActivity
func (*Agent) OnUserTurnCompleted ¶
func (a *Agent) OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
func (*Agent) Start ¶
func (a *Agent) Start(session *AgentSession, agentIntf AgentInterface) *AgentActivity
func (*Agent) UpdateInstructions ¶
type AgentActivity ¶
type AgentActivity struct {
AgentIntf AgentInterface
Agent *Agent
Session *AgentSession
// contains filtered or unexported fields
}
AgentActivity handles the internal event loops, I/O processing, and speech generation queue for an Agent.
func NewAgentActivity ¶
func NewAgentActivity(agentIntf AgentInterface, session *AgentSession) *AgentActivity
func (*AgentActivity) OnEndOfSpeech ¶
func (a *AgentActivity) OnEndOfSpeech(ev *vad.VADEvent)
func (*AgentActivity) OnFinalTranscript ¶
func (a *AgentActivity) OnFinalTranscript(ev *stt.SpeechEvent)
func (*AgentActivity) OnStartOfSpeech ¶
func (a *AgentActivity) OnStartOfSpeech(ev *vad.VADEvent)
Event callbacks from RecognitionHooks
func (*AgentActivity) ScheduleSpeech ¶
func (a *AgentActivity) ScheduleSpeech(speech *SpeechHandle, priority int, force bool) error
func (*AgentActivity) Start ¶
func (a *AgentActivity) Start()
func (*AgentActivity) Stop ¶
func (a *AgentActivity) Stop()
type AgentInterface ¶
type AgentInterface interface {
OnEnter()
OnExit()
OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
GetAgent() *Agent
GetActivity() *AgentActivity
}
type AgentSession ¶
type AgentSession struct {
Options AgentSessionOptions
ChatCtx *llm.ChatContext
Agent AgentInterface
STT stt.STT
VAD vad.VAD
LLM llm.LLM
TTS tts.TTS
Tools []llm.Tool
Assistant *PipelineAgent
Room *lksdk.Room
MetricsCollector *telemetry.UsageCollector
UserState UserState
AgentState AgentState
// Transcript attribution — set by RoomIO when tracks are established.
RemoteUserIdentity string
RemoteTrackSID string
AgentTrackSID string
// Event channels
AgentStateChangedCh chan AgentStateChangedEvent
UserStateChangedCh chan UserStateChangedEvent
// contains filtered or unexported fields
}
func NewAgentSession ¶
func NewAgentSession(agent AgentInterface, room *lksdk.Room, opts AgentSessionOptions) *AgentSession
func (*AgentSession) GenerateReply ¶
func (s *AgentSession) GenerateReply(ctx context.Context, userInput string) error
func (*AgentSession) GetAgentTrackSID ¶
func (s *AgentSession) GetAgentTrackSID() string
func (*AgentSession) OnAudioFrame ¶
func (s *AgentSession) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
func (*AgentSession) PublishAgentTranscript ¶
func (s *AgentSession) PublishAgentTranscript(text string)
PublishAgentTranscript publishes the agent's LLM response to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).
func (*AgentSession) PublishUserTranscript ¶
func (s *AgentSession) PublishUserTranscript(text string)
PublishUserTranscript publishes the user's STT transcript to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).
func (*AgentSession) SetAgentTrackSID ¶
func (s *AgentSession) SetAgentTrackSID(sid string)
func (*AgentSession) SetRemoteTrackSID ¶
func (s *AgentSession) SetRemoteTrackSID(sid string)
func (*AgentSession) SetRemoteUserIdentity ¶
func (s *AgentSession) SetRemoteUserIdentity(identity string)
func (*AgentSession) UpdateAgentState ¶
func (s *AgentSession) UpdateAgentState(state AgentState)
func (*AgentSession) UpdateUserState ¶
func (s *AgentSession) UpdateUserState(state UserState)
type AgentSessionOptions ¶
type AgentSessionOptions struct {
AllowInterruptions bool
DiscardAudioIfUninterruptible bool
MinInterruptionDuration float64
MinInterruptionWords int
MinEndpointingDelay float64
MaxEndpointingDelay float64
MaxToolSteps int
UserAwayTimeout float64
FalseInterruptionTimeout float64
ResumeFalseInterruption bool
MinConsecutiveSpeechDelay float64
UseTTSAlignedTranscript bool
PreemptiveGeneration bool
AECWarmupDuration float64
}
type AgentState ¶
type AgentState string
type AgentStateChangedEvent ¶
type AgentStateChangedEvent struct {
OldState AgentState
NewState AgentState
}
type AgentTask ¶
AgentTask represents a sub-agent execution that returns a result
func NewAgentTask ¶
type AudioConfig ¶
type AudioConfig struct {
Source AudioSource
Volume float64
Probability float64
}
type AudioRecognition ¶
type AudioRecognition struct {
// contains filtered or unexported fields
}
func NewAudioRecognition ¶
func NewAudioRecognition(session *AgentSession, hooks RecognitionHooks, s stt.STT, v vad.VAD) *AudioRecognition
func (*AudioRecognition) PushAudio ¶
func (ar *AudioRecognition) PushAudio(frame *model.AudioFrame) error
type AudioSource ¶
type AudioSource interface{} // Can be string, BuiltinAudioClip, or <-chan *model.AudioFrame
type Avatar ¶
type Avatar struct {
State AvatarState
}
type AvatarRunner ¶
type AvatarRunner struct {
// contains filtered or unexported fields
}
AvatarRunner coordinates Avatar IO and LipSync events.
func NewAvatarRunner ¶
func NewAvatarRunner(io AvatarIO) *AvatarRunner
func (*AvatarRunner) SimulateLipSync ¶
func (r *AvatarRunner) SimulateLipSync(text string)
SimulateLipSync takes text (from TranscriptSynchronizer) and simulates basic lip movements
func (*AvatarRunner) Stop ¶
func (r *AvatarRunner) Stop()
type AvatarState ¶
type AvatarState string
const ( AvatarStateIdle AvatarState = "idle" AvatarStateSpeaking AvatarState = "speaking" )
type BackgroundAudioPlayer ¶
type BackgroundAudioPlayer struct {
// contains filtered or unexported fields
}
func NewBackgroundAudioPlayer ¶
func NewBackgroundAudioPlayer(ambientSound, thinkingSound interface{}) *BackgroundAudioPlayer
func (*BackgroundAudioPlayer) AgentStateChanged ¶
func (p *BackgroundAudioPlayer) AgentStateChanged(newState AgentState)
func (*BackgroundAudioPlayer) Close ¶
func (p *BackgroundAudioPlayer) Close() error
func (*BackgroundAudioPlayer) Play ¶
func (p *BackgroundAudioPlayer) Play(audio interface{}, loop bool) *PlayHandle
func (*BackgroundAudioPlayer) Start ¶
func (p *BackgroundAudioPlayer) Start(room *lksdk.Room, agentSession *AgentSession) error
type BuiltinAudioClip ¶
type BuiltinAudioClip string
const ( CityAmbience BuiltinAudioClip = "city-ambience.ogg" ForestAmbience BuiltinAudioClip = "forest-ambience.ogg" OfficeAmbience BuiltinAudioClip = "office-ambience.ogg" CrowdedRoom BuiltinAudioClip = "crowded-room.ogg" KeyboardTyping BuiltinAudioClip = "keyboard-typing.ogg" KeyboardTyping2 BuiltinAudioClip = "keyboard-typing2.ogg" HoldMusic BuiltinAudioClip = "hold_music.ogg" )
func (BuiltinAudioClip) Path ¶
func (b BuiltinAudioClip) Path() string
type ClientEventPayload ¶
type ClientEventsDispatcher ¶
type ClientEventsDispatcher struct {
// contains filtered or unexported fields
}
ClientEventsDispatcher manages sending Agent states to the LiveKit Room DataChannel
func NewClientEventsDispatcher ¶
func NewClientEventsDispatcher(room *lksdk.Room) *ClientEventsDispatcher
func (*ClientEventsDispatcher) DispatchAgentState ¶
func (d *ClientEventsDispatcher) DispatchAgentState(state AgentState)
DispatchAgentState emits AgentStateIdle, AgentStateThinking, AgentStateSpeaking
func (*ClientEventsDispatcher) DispatchUserState ¶
func (d *ClientEventsDispatcher) DispatchUserState(state UserState)
DispatchUserState emits UserStateListening, UserStateSpeaking
type CloseEvent ¶
type CloseEvent struct {
Reason CloseReason
Error error
CreatedAt time.Time
}
func (*CloseEvent) GetType ¶
func (e *CloseEvent) GetType() string
type CloseReason ¶
type CloseReason string
const ( CloseReasonError CloseReason = "error" CloseReasonJobShutdown CloseReason = "job_shutdown" CloseReasonParticipantDisconnected CloseReason = "participant_disconnected" CloseReasonUserInitiated CloseReason = "user_initiated" )
type ConversationItemAddedEvent ¶
func (*ConversationItemAddedEvent) GetType ¶
func (e *ConversationItemAddedEvent) GetType() string
type DataStreamIO ¶
type DataStreamIO struct {
// contains filtered or unexported fields
}
func NewDataStreamIO ¶
func NewDataStreamIO(room *lksdk.Room) *DataStreamIO
func (*DataStreamIO) SendAvatarData ¶
func (io *DataStreamIO) SendAvatarData(ctx context.Context, data []byte) error
type EndOfTurnInfo ¶
type EvaluationResult ¶
type IVRActivity ¶
type IVRActivity struct {
AgentIntf AgentInterface
Agent *Agent
// contains filtered or unexported fields
}
func NewIVRActivity ¶
func NewIVRActivity(agentIntf AgentInterface) *IVRActivity
func (*IVRActivity) OnDtmf ¶
func (i *IVRActivity) OnDtmf(digit string)
func (*IVRActivity) SetDigitCallback ¶
func (*IVRActivity) Start ¶
func (i *IVRActivity) Start()
func (*IVRActivity) Stop ¶
func (i *IVRActivity) Stop()
type InputDetails ¶
type InputDetails struct {
Modality string
}
func DefaultInputDetails ¶
func DefaultInputDetails() InputDetails
type LLMGenerationData ¶
type LLMGenerationData struct {
TextCh chan string
FunctionCh chan *llm.FunctionToolCall
FullTextCh chan string // receives the complete assembled text when streaming is done
Usage *llm.CompletionUsage
}
func PerformLLMInference ¶
func PerformLLMInference(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []llm.Tool) (*LLMGenerationData, error)
type LLMTurnDetector ¶
type LLMTurnDetector struct {
// contains filtered or unexported fields
}
LLMTurnDetector uses an LLM to predict if the user has finished speaking. It sends the recent conversation history to the LLM and asks for a probability score.
func NewLLMTurnDetector ¶
func NewLLMTurnDetector(llmInstance llm.LLM) *LLMTurnDetector
func (*LLMTurnDetector) PredictEndOfTurn ¶
func (m *LLMTurnDetector) PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)
type MetricsCollectedEvent ¶
type MetricsCollectedEvent struct {
Metrics telemetry.AgentMetrics
CreatedAt time.Time
}
func (*MetricsCollectedEvent) GetType ¶
func (e *MetricsCollectedEvent) GetType() string
type MultimodalAgent ¶
type MultimodalAgent struct {
PublishAudio func(frame *model.AudioFrame) error
// contains filtered or unexported fields
}
func NewMultimodalAgent ¶
func NewMultimodalAgent( m llm.RealtimeModel, chatCtx *llm.ChatContext, ) *MultimodalAgent
func (*MultimodalAgent) OnAudioFrame ¶
func (ma *MultimodalAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
func (*MultimodalAgent) Start ¶
func (ma *MultimodalAgent) Start(ctx context.Context, s *AgentSession) error
type PipelineAgent ¶
type PipelineAgent struct {
LLM llm.LLM
PublishAudio func(frame *model.AudioFrame) error
// contains filtered or unexported fields
}
func NewPipelineAgent ¶
func (*PipelineAgent) OnAudioFrame ¶
func (va *PipelineAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
func (*PipelineAgent) Start ¶
func (va *PipelineAgent) Start(ctx context.Context, s *AgentSession) error
type PlayHandle ¶
type PlayHandle struct {
// contains filtered or unexported fields
}
func (*PlayHandle) Done ¶
func (h *PlayHandle) Done() bool
func (*PlayHandle) Stop ¶
func (h *PlayHandle) Stop()
func (*PlayHandle) WaitForPlayout ¶
func (h *PlayHandle) WaitForPlayout()
type Plugin ¶
func RegisteredPlugins ¶
func RegisteredPlugins() []Plugin
type QueueIO ¶
type QueueIO struct {
// contains filtered or unexported fields
}
func NewQueueIO ¶
func NewQueueIO() *QueueIO
type RecognitionHooks ¶
type RecordingOptions ¶
type RunAssert ¶
type RunAssert struct {
ChatCtx *llm.ChatContext
// contains filtered or unexported fields
}
func (*RunAssert) ContainsMessage ¶
func (*RunAssert) IsFunctionCall ¶
type RunContext ¶
type RunContext struct {
Session *AgentSession
SpeechHandle *SpeechHandle
FunctionCall *llm.FunctionCall
}
func GetRunContext ¶
func GetRunContext(ctx context.Context) *RunContext
func (*RunContext) WaitForPlayout ¶
func (r *RunContext) WaitForPlayout(ctx context.Context) error
type RunResult ¶
type RunResult struct {
ChatCtx *llm.ChatContext
Expect *RunAssert
}
func NewRunResult ¶
func NewRunResult(chatCtx *llm.ChatContext) *RunResult
type SessionReport ¶
type SessionReport struct {
RecordingOptions RecordingOptions `json:"recording_options"`
JobID string `json:"job_id"`
RoomID string `json:"room_id"`
Room string `json:"room"`
Options AgentSessionOptions `json:"options"`
Events []any `json:"events"`
ChatHistory *llm.ChatContext `json:"chat_history"`
AudioRecordingPath *string `json:"audio_recording_path,omitempty"`
AudioRecordingStartedAt *float64 `json:"audio_recording_started_at,omitempty"`
Duration *float64 `json:"duration,omitempty"`
StartedAt *float64 `json:"started_at,omitempty"`
Timestamp float64 `json:"timestamp"`
}
func NewSessionReport ¶
func NewSessionReport() *SessionReport
type SpeechCreatedEvent ¶
type SpeechCreatedEvent struct {
UserInitiated bool
Source string // "say" or "generate_reply"
SpeechHandle *SpeechHandle
CreatedAt time.Time
}
func (*SpeechCreatedEvent) GetType ¶
func (e *SpeechCreatedEvent) GetType() string
type SpeechHandle ¶
type SpeechHandle struct {
ID string
AllowInterruptions bool
InputDetails InputDetails
Priority int
CreatedAt time.Time
// contains filtered or unexported fields
}
func NewSpeechHandle ¶
func NewSpeechHandle(allowInterruptions bool, inputDetails InputDetails) *SpeechHandle
func (*SpeechHandle) Interrupt ¶
func (s *SpeechHandle) Interrupt(force bool) error
func (*SpeechHandle) IsDone ¶
func (s *SpeechHandle) IsDone() bool
func (*SpeechHandle) IsInterrupted ¶
func (s *SpeechHandle) IsInterrupted() bool
func (*SpeechHandle) IsScheduled ¶
func (s *SpeechHandle) IsScheduled() bool
func (*SpeechHandle) MarkDone ¶
func (s *SpeechHandle) MarkDone()
func (*SpeechHandle) MarkScheduled ¶
func (s *SpeechHandle) MarkScheduled()
type TTSGenerationData ¶
type TTSGenerationData struct {
AudioCh chan *model.AudioFrame
TTFB time.Duration
}
func PerformTTSInference ¶
type Tagger ¶
type Tagger struct {
// contains filtered or unexported fields
}
func (*Tagger) Evaluation ¶
func (t *Tagger) Evaluation(result *EvaluationResult)
func (*Tagger) OutcomeReason ¶
type ToolExecutionOutput ¶
type ToolExecutionOutput struct {
FncCall llm.FunctionCall
FncCallOut *llm.FunctionCallOutput
RawOutput any
RawError error
}
type TranscriptSynchronizer ¶
type TranscriptSynchronizer struct {
// contains filtered or unexported fields
}
TranscriptSynchronizer drip-feeds text to match the playout speed of audio.
func NewTranscriptSynchronizer ¶
func NewTranscriptSynchronizer(speakingRate float64) *TranscriptSynchronizer
NewTranscriptSynchronizer initializes the synchronizer. Default speaking rate is usually ~3.83 syllables/sec.
func (*TranscriptSynchronizer) Close ¶
func (s *TranscriptSynchronizer) Close()
func (*TranscriptSynchronizer) EventCh ¶
func (s *TranscriptSynchronizer) EventCh() <-chan string
func (*TranscriptSynchronizer) Interrupt ¶
func (s *TranscriptSynchronizer) Interrupt()
Interrupt immediately flushes the remaining text buffer to the event channel and stops syncing.
func (*TranscriptSynchronizer) PushAudio ¶
func (s *TranscriptSynchronizer) PushAudio(frame *model.AudioFrame)
func (*TranscriptSynchronizer) PushText ¶
func (s *TranscriptSynchronizer) PushText(text string)
type TranscriptionFilter ¶
type TranscriptionFilter struct {
SpeakingRate float64
}
func NewTranscriptionFilter ¶
func NewTranscriptionFilter() *TranscriptionFilter
type TurnDetectionMode ¶
type TurnDetectionMode string
const ( TurnDetectionModeSTT TurnDetectionMode = "stt" TurnDetectionModeVAD TurnDetectionMode = "vad" TurnDetectionModeRealtimeLLM TurnDetectionMode = "realtime_llm" TurnDetectionModeManual TurnDetectionMode = "manual" )
type TurnDetector ¶
type UserInputTranscribedEvent ¶
type UserInputTranscribedEvent struct {
Language string
Transcript string
IsFinal bool
SpeakerID string
CreatedAt time.Time
}
func (*UserInputTranscribedEvent) GetType ¶
func (e *UserInputTranscribedEvent) GetType() string
type UserStateChangedEvent ¶
type VoiceActivityVideoSampler ¶
type VoiceActivityVideoSampler struct {
// contains filtered or unexported fields
}
VoiceActivityVideoSampler samples video frames at a reduced rate (e.g. 1 fps) only when the user is speaking, to reduce LLM context token usage.
func NewVoiceActivityVideoSampler ¶
func NewVoiceActivityVideoSampler(session *AgentSession, sampleRate float64, opts images.EncodeOptions) *VoiceActivityVideoSampler
func (*VoiceActivityVideoSampler) OnVideoFrame ¶
func (s *VoiceActivityVideoSampler) OnVideoFrame(ctx context.Context, frame *images.VideoFrame) bool
OnVideoFrame should be called for every incoming WebRTC video frame. It returns true if the frame should be forwarded to the LLM.
func (*VoiceActivityVideoSampler) SetSpeaking ¶
func (s *VoiceActivityVideoSampler) SetSpeaking(speaking bool)