Documentation
¶
Index ¶
- Constants
- func DefaultTextInputCallback(s *AgentSession, ev TextInputEvent) error
- func GetOutput[T any](ctx context.Context, r *RunResult[T]) (T, error)
- func PerformToolExecutions(ctx context.Context, functionCh <-chan *llm.FunctionToolCall, ...) <-chan ToolExecutionOutput
- func RegisterPlugin(p Plugin)
- func UploadSessionReport(cloudURL string, apiKey string, apiSecret string, agentName string, ...) error
- func WithRunContext(ctx context.Context, rc *RunContext) context.Context
- type AVSynchronizer
- type Agent
- func (a *Agent) GetActivity() *AgentActivity
- func (a *Agent) GetAgent() *Agent
- func (a *Agent) OnEnter(ctx context.Context) error
- func (a *Agent) OnExit(ctx context.Context) error
- func (a *Agent) OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
- func (a *Agent) Start(session *AgentSession, agentIntf AgentInterface) *AgentActivity
- func (a *Agent) UpdateInstructions(ctx context.Context, instructions string) error
- func (a *Agent) UpdateTools(ctx context.Context, tools []interface{}) error
- type AgentActivity
- func (a *AgentActivity) AClose()
- func (a *AgentActivity) CaptureVideoFrame(frame *model.VideoFrame) error
- func (a *AgentActivity) ClearUserTurn()
- func (a *AgentActivity) CommitUserTurn(opts *CommitUserTurnOpts)
- func (a *AgentActivity) Drain(ctx context.Context) error
- func (a *AgentActivity) Interrupt(force bool) error
- func (a *AgentActivity) OnEndOfSpeech(ev *vad.VADEvent)
- func (a *AgentActivity) OnFinalTranscript(ev *stt.SpeechEvent)
- func (a *AgentActivity) OnInterimTranscript(ev *stt.SpeechEvent)
- func (a *AgentActivity) OnStartOfSpeech(ev *vad.VADEvent)
- func (a *AgentActivity) Pause() error
- func (a *AgentActivity) PauseScheduling()
- func (a *AgentActivity) PushAudio(frame *model.AudioFrame) error
- func (a *AgentActivity) PushVideo(frame *model.VideoFrame) error
- func (a *AgentActivity) Resume() error
- func (a *AgentActivity) ResumeScheduling()
- func (a *AgentActivity) ScheduleSpeech(speech *SpeechHandle, priority int, force bool) error
- func (a *AgentActivity) Start()
- func (a *AgentActivity) Stop()
- func (a *AgentActivity) UpdateOptions(opts AgentSessionOptions)
- type AgentEvent
- type AgentFalseInterruptionEvent
- type AgentHandoffEvent
- type AgentHandoffRunEvent
- type AgentInput
- type AgentInterface
- type AgentOutput
- type AgentSession
- func (s *AgentSession) ClearUserTurn()
- func (s *AgentSession) Close() error
- func (s *AgentSession) CommitUserTurn(opts *CommitUserTurnOpts)
- func (s *AgentSession) GenerateReply(ctx context.Context, userInput string, allowInterruptions bool) (any, error)
- func (s *AgentSession) GetAgentTrackSID() string
- func (s *AgentSession) GetPublisher() interface{ ... }
- func (s *AgentSession) Interrupt(ctx context.Context) error
- func (s *AgentSession) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
- func (s *AgentSession) Pause() error
- func (s *AgentSession) PublishAgentTranscript(text string)
- func (s *AgentSession) PublishUserTranscript(text string)
- func (s *AgentSession) Resume() error
- func (s *AgentSession) Say(text string, allowInterruptions bool) (*SpeechHandle, error)
- func (s *AgentSession) SetAgentTrackSID(sid string)
- func (s *AgentSession) SetAudioOutput(out AudioOutput)
- func (s *AgentSession) SetRemoteTrackSID(sid string)
- func (s *AgentSession) SetRemoteUserIdentity(identity string)
- func (s *AgentSession) SetRoom(room *lksdk.Room)
- func (s *AgentSession) SetVideoOutput(out VideoOutput)
- func (s *AgentSession) Start(ctx context.Context) error
- func (s *AgentSession) Stop(ctx context.Context) error
- func (s *AgentSession) TimelineSnapshot() []*AgentEvent
- func (s *AgentSession) UpdateAgent(agent AgentInterface, opts *UpdateAgentOpts) error
- func (s *AgentSession) UpdateAgentState(state AgentState)
- func (s *AgentSession) UpdateOptions(opts AgentSessionOptions)
- func (s *AgentSession) UpdateUserState(state UserState)
- type AgentSessionOptions
- type AgentState
- type AgentStateChangedEvent
- type AgentTask
- type AudioConfig
- type AudioInput
- type AudioOutput
- type AudioReceiver
- type AudioRecognition
- type AudioSource
- type AvatarIO
- type AvatarOptions
- type AvatarRunner
- type BackgroundAudioPlayer
- type BuiltinAudioClip
- type ChatMessageRunEvent
- type ClientEventPayload
- type ClientEventsDispatcher
- type CloseEvent
- type CloseReason
- type CommitUserTurnOpts
- type ConversationItemAddedEvent
- type DataStreamIO
- type DtmfEvent
- type EndOfTurnInfo
- type ErrorEvent
- type EvaluationResult
- type Event
- type EventTimeline
- type FunctionCallOutputRunEvent
- type FunctionCallRunEvent
- type FunctionToolsExecutedEvent
- type GenerateReplyOpts
- type GetAgentInfoResponse
- type GetChatHistoryResponse
- type GetSessionStateResponse
- type IVRActivity
- type InputDetails
- type JobResult
- type LLMGenerationData
- type LLMNodeFunc
- type LLMTurnDetector
- type MediaPublisher
- type MetricsCollectedEvent
- type MultimodalAgent
- type ParticipantActiveEvent
- type ParticipantReference
- type PipelineAgent
- type PlayHandle
- type PlaybackFinishedEvent
- type PlaybackStartedEvent
- type Plugin
- type QueueIO
- type RealtimeAudioOutputNodeFunc
- type RecognitionHooks
- type RecordingOptions
- type RunAssert
- type RunContext
- type RunEvent
- type RunResult
- func (r *RunResult[T]) AddEvent(ev RunEvent)
- func (r *RunResult[T]) Done() <-chan struct{}
- func (r *RunResult[T]) Eval(ctx context.Context, evaluator evals.Evaluator, llmInstance llm.LLM) (*evals.JudgmentResult, error)
- func (r *RunResult[T]) GetEvents() []RunEvent
- func (r *RunResult[T]) Wait(ctx context.Context) error
- func (r *RunResult[T]) WaitAny(ctx context.Context) (T, error)
- func (r *RunResult[T]) WatchHandle(ctx context.Context, handle *SpeechHandle)
- func (r *RunResult[T]) WatchTask(done <-chan struct{})
- type RunResultInterface
- type STTNodeFunc
- type SendMessageRequest
- type SendMessageResponse
- type SessionInfo
- type SessionReport
- type SpeechCreatedEvent
- type SpeechHandle
- func (s *SpeechHandle) Error() error
- func (s *SpeechHandle) Interrupt(force bool) error
- func (s *SpeechHandle) IsDone() bool
- func (s *SpeechHandle) IsInterrupted() bool
- func (s *SpeechHandle) IsScheduled() bool
- func (s *SpeechHandle) MarkDone()
- func (s *SpeechHandle) MarkDoneWithError(err error)
- func (s *SpeechHandle) MarkScheduled()
- func (s *SpeechHandle) Wait(ctx context.Context) error
- type StreamRequest
- type StreamResponse
- type SyncEvent
- type SyncedAudioOutput
- func (s *SyncedAudioOutput) CaptureFrame(frame *model.AudioFrame) error
- func (s *SyncedAudioOutput) ClearBuffer()
- func (s *SyncedAudioOutput) Flush()
- func (s *SyncedAudioOutput) Label() string
- func (s *SyncedAudioOutput) OnAttached()
- func (s *SyncedAudioOutput) OnDetached()
- func (s *SyncedAudioOutput) OnPlaybackFinished(f func(ev PlaybackFinishedEvent))
- func (s *SyncedAudioOutput) OnPlaybackStarted(f func(ev PlaybackStartedEvent))
- func (s *SyncedAudioOutput) Pause()
- func (s *SyncedAudioOutput) Resume()
- func (s *SyncedAudioOutput) WaitForPlayout(ctx context.Context) error
- type SyncedTextOutput
- type TTSGenerationData
- type TTSNodeFunc
- type Tagger
- type TaskWaiter
- type TextInput
- type TextInputCallback
- type TextInputEvent
- type TextOutput
- type ToolExecutionOutput
- type TranscriptSynchronizer
- func (s *TranscriptSynchronizer) Close()
- func (s *TranscriptSynchronizer) EventCh() <-chan SyncEvent
- func (s *TranscriptSynchronizer) Interrupt()
- func (s *TranscriptSynchronizer) PushAudio(frame *model.AudioFrame)
- func (s *TranscriptSynchronizer) PushText(text string)
- func (s *TranscriptSynchronizer) RotateSegment()
- func (s *TranscriptSynchronizer) SetSegmentID(id string)
- type TranscriptionFilter
- type TranscriptionNodeFunc
- type TransitionActivityAction
- type TurnDetectionMode
- type TurnDetector
- type UpdateAgentOpts
- type UserInputTranscribedEvent
- type UserState
- type UserStateChangedEvent
- type VideoGenerator
- type VideoInput
- type VideoNodeFunc
- type VideoOutput
- type VoiceActivityVideoSampler
Constants ¶
const ( TopicAgentRequest = "lk.agent.request" TopicAgentResponse = "lk.agent.response" TopicChat = "lk.chat" )
const ( SpeechPriorityLow = 0 SpeechPriorityNormal = 5 SpeechPriorityHigh = 10 InterruptionTimeout = 5 * time.Second )
const TopicClientEvents = "lk-agent-client-events"
Variables ¶
This section is empty.
Functions ¶
func DefaultTextInputCallback ¶ added in v0.0.5
func DefaultTextInputCallback(s *AgentSession, ev TextInputEvent) error
func GetOutput ¶ added in v0.0.5
GetOutput returns the strictly typed final output of the run, blocking until completion.
func PerformToolExecutions ¶
func PerformToolExecutions( ctx context.Context, functionCh <-chan *llm.FunctionToolCall, toolCtx *llm.ToolContext, ) <-chan ToolExecutionOutput
func RegisterPlugin ¶
func RegisterPlugin(p Plugin)
func UploadSessionReport ¶
func WithRunContext ¶
func WithRunContext(ctx context.Context, rc *RunContext) context.Context
Types ¶
type AVSynchronizer ¶ added in v0.0.5
type Agent ¶
type Agent struct {
ID string
Instructions string
ChatCtx *llm.ChatContext
Tools []interface{}
TurnDetection TurnDetectionMode
TurnDetector TurnDetector
STT stt.STT
VAD vad.VAD
LLM llm.LLM
TTS tts.TTS
LLMNode LLMNodeFunc
TTSNode TTSNodeFunc
STTNode STTNodeFunc
VideoNode VideoNodeFunc
TranscriptionNode TranscriptionNodeFunc
RealtimeAudioOutputNode RealtimeAudioOutputNodeFunc
AllowInterruptions bool
MinConsecutiveSpeechDelay float64
UseTTSAlignedTranscript bool
MinEndpointingDelay float64
MaxEndpointingDelay float64
// contains filtered or unexported fields
}
func (*Agent) GetActivity ¶
func (a *Agent) GetActivity() *AgentActivity
func (*Agent) OnUserTurnCompleted ¶
func (a *Agent) OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
func (*Agent) Start ¶
func (a *Agent) Start(session *AgentSession, agentIntf AgentInterface) *AgentActivity
func (*Agent) UpdateInstructions ¶
type AgentActivity ¶
type AgentActivity struct {
AgentIntf AgentInterface
Agent *Agent
Session *AgentSession
// contains filtered or unexported fields
}
AgentActivity handles the internal event loops, I/O processing, and speech generation queue for an Agent.
func NewAgentActivity ¶
func NewAgentActivity(agentIntf AgentInterface, session *AgentSession, parentCtx context.Context) *AgentActivity
func (*AgentActivity) AClose ¶ added in v0.0.5
func (a *AgentActivity) AClose()
func (*AgentActivity) CaptureVideoFrame ¶ added in v0.0.5
func (a *AgentActivity) CaptureVideoFrame(frame *model.VideoFrame) error
func (*AgentActivity) ClearUserTurn ¶ added in v0.0.5
func (a *AgentActivity) ClearUserTurn()
func (*AgentActivity) CommitUserTurn ¶ added in v0.0.5
func (a *AgentActivity) CommitUserTurn(opts *CommitUserTurnOpts)
func (*AgentActivity) Drain ¶ added in v0.0.5
func (a *AgentActivity) Drain(ctx context.Context) error
func (*AgentActivity) Interrupt ¶ added in v0.0.5
func (a *AgentActivity) Interrupt(force bool) error
func (*AgentActivity) OnEndOfSpeech ¶
func (a *AgentActivity) OnEndOfSpeech(ev *vad.VADEvent)
func (*AgentActivity) OnFinalTranscript ¶
func (a *AgentActivity) OnFinalTranscript(ev *stt.SpeechEvent)
func (*AgentActivity) OnInterimTranscript ¶ added in v0.0.5
func (a *AgentActivity) OnInterimTranscript(ev *stt.SpeechEvent)
func (*AgentActivity) OnStartOfSpeech ¶
func (a *AgentActivity) OnStartOfSpeech(ev *vad.VADEvent)
Event callbacks from RecognitionHooks
func (*AgentActivity) Pause ¶ added in v0.0.5
func (a *AgentActivity) Pause() error
func (*AgentActivity) PauseScheduling ¶ added in v0.0.5
func (a *AgentActivity) PauseScheduling()
func (*AgentActivity) PushAudio ¶ added in v0.0.5
func (a *AgentActivity) PushAudio(frame *model.AudioFrame) error
func (*AgentActivity) PushVideo ¶ added in v0.0.5
func (a *AgentActivity) PushVideo(frame *model.VideoFrame) error
func (*AgentActivity) Resume ¶ added in v0.0.5
func (a *AgentActivity) Resume() error
func (*AgentActivity) ResumeScheduling ¶ added in v0.0.5
func (a *AgentActivity) ResumeScheduling()
func (*AgentActivity) ScheduleSpeech ¶
func (a *AgentActivity) ScheduleSpeech(speech *SpeechHandle, priority int, force bool) error
func (*AgentActivity) Start ¶
func (a *AgentActivity) Start()
func (*AgentActivity) Stop ¶
func (a *AgentActivity) Stop()
func (*AgentActivity) UpdateOptions ¶ added in v0.0.5
func (a *AgentActivity) UpdateOptions(opts AgentSessionOptions)
type AgentEvent ¶ added in v0.0.5
type AgentEvent struct {
Type string `json:"type"`
Timestamp float64 `json:"timestamp"`
UserStateChanged *UserStateChangedEvent `json:"user_state_changed,omitempty"`
AgentStateChanged *AgentStateChangedEvent `json:"agent_state_changed,omitempty"`
UserInputTranscribed *UserInputTranscribedEvent `json:"user_input_transcribed,omitempty"`
AgentFalseInterruption *AgentFalseInterruptionEvent `json:"agent_false_interruption,omitempty"`
MetricsCollected *MetricsCollectedEvent `json:"metrics_collected,omitempty"`
ConversationItemAdded *ConversationItemAddedEvent `json:"conversation_item_added,omitempty"`
FunctionToolsExecuted *FunctionToolsExecutedEvent `json:"function_tools_executed,omitempty"`
AgentHandoff *AgentHandoffEvent `json:"agent_handoff,omitempty"`
SpeechCreated *SpeechCreatedEvent `json:"speech_created,omitempty"`
Error *ErrorEvent `json:"error,omitempty"`
Close *CloseEvent `json:"close,omitempty"`
ParticipantActive *ParticipantActiveEvent `json:"participant_active,omitempty"`
}
func NewAgentEvent ¶ added in v0.0.5
func NewAgentEvent(ev Event) *AgentEvent
func (*AgentEvent) MarshalJSON ¶ added in v0.0.5
func (ae *AgentEvent) MarshalJSON() ([]byte, error)
func (*AgentEvent) UnmarshalJSON ¶ added in v0.0.5
func (ae *AgentEvent) UnmarshalJSON(data []byte) error
type AgentFalseInterruptionEvent ¶ added in v0.0.5
type AgentFalseInterruptionEvent struct {
Resumed bool `json:"resumed"`
CreatedAt time.Time `json:"created_at"`
}
func (*AgentFalseInterruptionEvent) GetType ¶ added in v0.0.5
func (e *AgentFalseInterruptionEvent) GetType() string
type AgentHandoffEvent ¶ added in v0.0.5
type AgentHandoffEvent struct {
OldAgent AgentInterface `json:"-"`
NewAgent AgentInterface `json:"-"`
OldAgentID string `json:"old_agent_id"`
NewAgentID string `json:"new_agent_id"`
Handoff *llm.AgentHandoff `json:"handoff"`
CreatedAt time.Time `json:"created_at"`
}
func (*AgentHandoffEvent) GetType ¶ added in v0.0.5
func (e *AgentHandoffEvent) GetType() string
type AgentHandoffRunEvent ¶ added in v0.0.5
type AgentHandoffRunEvent struct {
Item *llm.AgentHandoff
OldAgent AgentInterface
NewAgent AgentInterface
}
func (*AgentHandoffRunEvent) GetCreatedAt ¶ added in v0.0.5
func (e *AgentHandoffRunEvent) GetCreatedAt() time.Time
func (*AgentHandoffRunEvent) GetItem ¶ added in v0.0.5
func (e *AgentHandoffRunEvent) GetItem() llm.ChatItem
func (*AgentHandoffRunEvent) RunEventType ¶ added in v0.0.5
func (e *AgentHandoffRunEvent) RunEventType() string
type AgentInput ¶ added in v0.0.5
type AgentInput struct {
Audio AudioInput
Text TextInput
Video VideoInput
}
type AgentInterface ¶
type AgentInterface interface {
OnEnter(ctx context.Context) error
OnExit(ctx context.Context) error
OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
GetAgent() *Agent
GetActivity() *AgentActivity
}
type AgentOutput ¶ added in v0.0.5
type AgentOutput struct {
Audio AudioOutput
Transcription TextOutput
Video VideoOutput
Publisher MediaPublisher
}
type AgentSession ¶
type AgentSession struct {
Options AgentSessionOptions
ChatCtx *llm.ChatContext
Agent AgentInterface
STT stt.STT
VAD vad.VAD
LLM llm.LLM
TTS tts.TTS
Tools []interface{}
Assistant *PipelineAgent
Room *lksdk.Room
Input AgentInput
Output AgentOutput
MetricsCollector *telemetry.UsageCollector
Timeline *EventTimeline
UserState UserState
AgentState AgentState
// Transcript attribution — set by RoomIO when tracks are established.
RemoteUserIdentity string
RemoteTrackSID string
AgentTrackSID string
Activity *AgentActivity
// Event channels
AgentStateChangedCh chan AgentStateChangedEvent
UserStateChangedCh chan UserStateChangedEvent
// contains filtered or unexported fields
}
func NewAgentSession ¶
func NewAgentSession(agent AgentInterface, room *lksdk.Room, opts AgentSessionOptions) *AgentSession
func (*AgentSession) ClearUserTurn ¶ added in v0.0.5
func (s *AgentSession) ClearUserTurn()
func (*AgentSession) Close ¶ added in v0.0.5
func (s *AgentSession) Close() error
func (*AgentSession) CommitUserTurn ¶ added in v0.0.5
func (s *AgentSession) CommitUserTurn(opts *CommitUserTurnOpts)
func (*AgentSession) GenerateReply ¶
func (*AgentSession) GetAgentTrackSID ¶
func (s *AgentSession) GetAgentTrackSID() string
func (*AgentSession) GetPublisher ¶ added in v0.0.5
func (*AgentSession) Interrupt ¶ added in v0.0.5
func (s *AgentSession) Interrupt(ctx context.Context) error
func (*AgentSession) OnAudioFrame ¶
func (s *AgentSession) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
func (*AgentSession) Pause ¶ added in v0.0.5
func (s *AgentSession) Pause() error
func (*AgentSession) PublishAgentTranscript ¶
func (s *AgentSession) PublishAgentTranscript(text string)
PublishAgentTranscript publishes the agent's LLM response to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).
func (*AgentSession) PublishUserTranscript ¶
func (s *AgentSession) PublishUserTranscript(text string)
PublishUserTranscript publishes the user's STT transcript to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).
func (*AgentSession) Resume ¶ added in v0.0.5
func (s *AgentSession) Resume() error
func (*AgentSession) Say ¶ added in v0.0.5
func (s *AgentSession) Say(text string, allowInterruptions bool) (*SpeechHandle, error)
func (*AgentSession) SetAgentTrackSID ¶
func (s *AgentSession) SetAgentTrackSID(sid string)
func (*AgentSession) SetAudioOutput ¶ added in v0.0.5
func (s *AgentSession) SetAudioOutput(out AudioOutput)
func (*AgentSession) SetRemoteTrackSID ¶
func (s *AgentSession) SetRemoteTrackSID(sid string)
func (*AgentSession) SetRemoteUserIdentity ¶
func (s *AgentSession) SetRemoteUserIdentity(identity string)
func (*AgentSession) SetRoom ¶ added in v0.0.5
func (s *AgentSession) SetRoom(room *lksdk.Room)
SetRoom wires the LiveKit room to the session after connection. This initialises the ClientEventsDispatcher (RPC handlers, state broadcasting) so the Playground can discover the agent's audio track and state.
func (*AgentSession) SetVideoOutput ¶ added in v0.0.5
func (s *AgentSession) SetVideoOutput(out VideoOutput)
func (*AgentSession) TimelineSnapshot ¶ added in v0.0.5
func (s *AgentSession) TimelineSnapshot() []*AgentEvent
func (*AgentSession) UpdateAgent ¶ added in v0.0.5
func (s *AgentSession) UpdateAgent(agent AgentInterface, opts *UpdateAgentOpts) error
func (*AgentSession) UpdateAgentState ¶
func (s *AgentSession) UpdateAgentState(state AgentState)
func (*AgentSession) UpdateOptions ¶ added in v0.0.5
func (s *AgentSession) UpdateOptions(opts AgentSessionOptions)
func (*AgentSession) UpdateUserState ¶
func (s *AgentSession) UpdateUserState(state UserState)
type AgentSessionOptions ¶
type AgentSessionOptions struct {
AllowInterruptions bool
DiscardAudioIfUninterruptible bool
MinInterruptionDuration float64
MinInterruptionWords int
MinEndpointingDelay float64
MaxEndpointingDelay float64
MaxToolSteps int
UserAwayTimeout float64
FalseInterruptionTimeout float64
ResumeFalseInterruption bool
MinConsecutiveSpeechDelay float64
UseTTSAlignedTranscript bool
PreemptiveGeneration bool
AECWarmupDuration float64
SpeakingRate float64
TranscriptRefreshRate time.Duration
LinkedParticipant lksdk.Participant
IVRDetection bool
}
type AgentState ¶
type AgentState string
const ( AgentStateInitializing AgentState = "initializing" AgentStateIdle AgentState = "idle" AgentStateListening AgentState = "listening" AgentStateThinking AgentState = "thinking" AgentStateSpeaking AgentState = "speaking" )
type AgentStateChangedEvent ¶
type AgentStateChangedEvent struct {
OldState AgentState `json:"old_state"`
NewState AgentState `json:"new_state"`
CreatedAt time.Time `json:"created_at"`
}
func (*AgentStateChangedEvent) GetType ¶ added in v0.0.5
func (e *AgentStateChangedEvent) GetType() string
type AgentTask ¶
AgentTask represents a sub-agent execution that returns a result
func NewAgentTask ¶
type AudioConfig ¶
type AudioConfig struct {
Source AudioSource
Volume float64
Probability float64
}
type AudioInput ¶ added in v0.0.5
type AudioInput interface {
Label() string
Stream() <-chan *model.AudioFrame
OnAttached()
OnDetached()
}
AudioInput represents a source of audio frames (e.g., mic or remote track)
type AudioOutput ¶ added in v0.0.5
type AudioOutput interface {
Label() string
CaptureFrame(frame *model.AudioFrame) error
Flush()
WaitForPlayout(ctx context.Context) error
ClearBuffer()
OnAttached()
OnDetached()
Pause()
Resume()
OnPlaybackStarted(func(ev PlaybackStartedEvent))
OnPlaybackFinished(func(ev PlaybackFinishedEvent))
}
AudioOutput represents a destination for audio frames (e.g., speakers or remote track)
type AudioReceiver ¶ added in v0.0.5
type AudioRecognition ¶
type AudioRecognition struct {
// contains filtered or unexported fields
}
func NewAudioRecognition ¶
func NewAudioRecognition(session *AgentSession, hooks RecognitionHooks, s stt.STT, v vad.VAD) *AudioRecognition
func (*AudioRecognition) Close ¶ added in v0.0.5
func (ar *AudioRecognition) Close()
func (*AudioRecognition) Flush ¶ added in v0.0.5
func (ar *AudioRecognition) Flush() error
func (*AudioRecognition) PushAudio ¶
func (ar *AudioRecognition) PushAudio(frame *model.AudioFrame) error
type AudioSource ¶
type AudioSource interface{} // Can be string, BuiltinAudioClip, or <-chan *model.AudioFrame
type AvatarOptions ¶ added in v0.0.5
type AvatarRunner ¶
type AvatarRunner struct {
// contains filtered or unexported fields
}
AvatarRunner coordinates Avatar IO and LipSync events.
func NewAvatarRunner ¶
func NewAvatarRunner(room *lksdk.Room, audioRecv AudioReceiver, videoGen VideoGenerator, opts AvatarOptions, avSync AVSynchronizer, lazyPublish bool) *AvatarRunner
func (*AvatarRunner) SendLipSyncEvent ¶ added in v0.0.5
func (r *AvatarRunner) SendLipSyncEvent(ctx context.Context, data []byte) error
func (*AvatarRunner) Stop ¶
func (r *AvatarRunner) Stop()
type BackgroundAudioPlayer ¶
type BackgroundAudioPlayer struct {
// contains filtered or unexported fields
}
func NewBackgroundAudioPlayer ¶
func NewBackgroundAudioPlayer(ambientSound, thinkingSound interface{}) *BackgroundAudioPlayer
func (*BackgroundAudioPlayer) AgentStateChanged ¶
func (p *BackgroundAudioPlayer) AgentStateChanged(newState AgentState)
func (*BackgroundAudioPlayer) Close ¶
func (p *BackgroundAudioPlayer) Close() error
func (*BackgroundAudioPlayer) Play ¶
func (p *BackgroundAudioPlayer) Play(audio interface{}, loop bool) *PlayHandle
func (*BackgroundAudioPlayer) Start ¶
func (p *BackgroundAudioPlayer) Start(room *lksdk.Room, agentSession *AgentSession) error
type BuiltinAudioClip ¶
type BuiltinAudioClip string
const ( CityAmbience BuiltinAudioClip = "city-ambience.ogg" ForestAmbience BuiltinAudioClip = "forest-ambience.ogg" OfficeAmbience BuiltinAudioClip = "office-ambience.ogg" CrowdedRoom BuiltinAudioClip = "crowded-room.ogg" KeyboardTyping BuiltinAudioClip = "keyboard-typing.ogg" KeyboardTyping2 BuiltinAudioClip = "keyboard-typing2.ogg" HoldMusic BuiltinAudioClip = "hold_music.ogg" )
func (BuiltinAudioClip) Path ¶
func (b BuiltinAudioClip) Path() string
type ChatMessageRunEvent ¶ added in v0.0.5
type ChatMessageRunEvent struct {
Item *llm.ChatMessage
}
func (*ChatMessageRunEvent) GetCreatedAt ¶ added in v0.0.5
func (e *ChatMessageRunEvent) GetCreatedAt() time.Time
func (*ChatMessageRunEvent) GetItem ¶ added in v0.0.5
func (e *ChatMessageRunEvent) GetItem() llm.ChatItem
func (*ChatMessageRunEvent) RunEventType ¶ added in v0.0.5
func (e *ChatMessageRunEvent) RunEventType() string
type ClientEventPayload ¶
type ClientEventsDispatcher ¶
type ClientEventsDispatcher struct {
// contains filtered or unexported fields
}
ClientEventsDispatcher manages sending Agent states to the LiveKit Room DataChannel and handling inbound RPC and DataChannel requests.
func NewClientEventsDispatcher ¶
func NewClientEventsDispatcher(room *lksdk.Room, session *AgentSession) *ClientEventsDispatcher
func (*ClientEventsDispatcher) Close ¶ added in v0.0.5
func (d *ClientEventsDispatcher) Close()
func (*ClientEventsDispatcher) DispatchAgentState ¶
func (d *ClientEventsDispatcher) DispatchAgentState(state AgentState)
DispatchAgentState emits AgentStateIdle, AgentStateThinking, AgentStateSpeaking
func (*ClientEventsDispatcher) DispatchUserState ¶
func (d *ClientEventsDispatcher) DispatchUserState(state UserState)
DispatchUserState emits UserStateListening, UserStateSpeaking
func (*ClientEventsDispatcher) RegisterTextInput ¶ added in v0.0.5
func (d *ClientEventsDispatcher) RegisterTextInput(cb TextInputCallback)
type CloseEvent ¶
type CloseEvent struct {
Reason CloseReason `json:"reason"`
Error error `json:"error,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
func (*CloseEvent) GetType ¶
func (e *CloseEvent) GetType() string
type CloseReason ¶
type CloseReason string
const ( CloseReasonError CloseReason = "error" CloseReasonJobShutdown CloseReason = "job_shutdown" CloseReasonParticipantDisconnected CloseReason = "participant_disconnected" CloseReasonUserInitiated CloseReason = "user_initiated" CloseReasonTaskCompleted CloseReason = "task_completed" )
type CommitUserTurnOpts ¶ added in v0.0.5
type ConversationItemAddedEvent ¶
type ConversationItemAddedEvent struct {
Item llm.ChatItem `json:"item"`
CreatedAt time.Time `json:"created_at"`
}
func (*ConversationItemAddedEvent) GetType ¶
func (e *ConversationItemAddedEvent) GetType() string
type DataStreamIO ¶
type DataStreamIO struct {
// contains filtered or unexported fields
}
func NewDataStreamIO ¶
func NewDataStreamIO(room *lksdk.Room) *DataStreamIO
func (*DataStreamIO) SendAvatarData ¶
func (io *DataStreamIO) SendAvatarData(ctx context.Context, data []byte) error
type EndOfTurnInfo ¶
type ErrorEvent ¶ added in v0.0.5
type ErrorEvent struct {
Error error `json:"error"`
Source any `json:"source,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
func (*ErrorEvent) GetType ¶ added in v0.0.5
func (e *ErrorEvent) GetType() string
type EvaluationResult ¶
type EventTimeline ¶ added in v0.0.5
type EventTimeline struct {
OnEvent func(ev *AgentEvent)
// contains filtered or unexported fields
}
func NewEventTimeline ¶ added in v0.0.5
func NewEventTimeline() *EventTimeline
func (*EventTimeline) AddEvent ¶ added in v0.0.5
func (t *EventTimeline) AddEvent(ev Event)
func (*EventTimeline) Clear ¶ added in v0.0.5
func (t *EventTimeline) Clear()
Clear releases all stored events and the OnEvent callback so the timeline (and everything it references) can be garbage-collected.
func (*EventTimeline) Snapshot ¶ added in v0.0.5
func (t *EventTimeline) Snapshot() []*AgentEvent
type FunctionCallOutputRunEvent ¶ added in v0.0.5
type FunctionCallOutputRunEvent struct {
Item *llm.FunctionCallOutput
}
func (*FunctionCallOutputRunEvent) GetCreatedAt ¶ added in v0.0.5
func (e *FunctionCallOutputRunEvent) GetCreatedAt() time.Time
func (*FunctionCallOutputRunEvent) GetItem ¶ added in v0.0.5
func (e *FunctionCallOutputRunEvent) GetItem() llm.ChatItem
func (*FunctionCallOutputRunEvent) RunEventType ¶ added in v0.0.5
func (e *FunctionCallOutputRunEvent) RunEventType() string
type FunctionCallRunEvent ¶ added in v0.0.5
type FunctionCallRunEvent struct {
Item *llm.FunctionCall
}
func (*FunctionCallRunEvent) GetCreatedAt ¶ added in v0.0.5
func (e *FunctionCallRunEvent) GetCreatedAt() time.Time
func (*FunctionCallRunEvent) GetItem ¶ added in v0.0.5
func (e *FunctionCallRunEvent) GetItem() llm.ChatItem
func (*FunctionCallRunEvent) RunEventType ¶ added in v0.0.5
func (e *FunctionCallRunEvent) RunEventType() string
type FunctionToolsExecutedEvent ¶ added in v0.0.5
type FunctionToolsExecutedEvent struct {
FunctionCalls []llm.FunctionCall `json:"function_calls"`
FunctionCallOutputs []*llm.FunctionCallOutput `json:"function_call_outputs"`
CreatedAt time.Time `json:"created_at"`
HasToolReply bool `json:"has_tool_reply"`
HasAgentHandoff bool `json:"has_agent_handoff"`
}
func (*FunctionToolsExecutedEvent) GetType ¶ added in v0.0.5
func (e *FunctionToolsExecutedEvent) GetType() string
type GenerateReplyOpts ¶ added in v0.0.5
type GenerateReplyOpts struct {
AllowInterruptions bool
}
type GetAgentInfoResponse ¶ added in v0.0.5
type GetChatHistoryResponse ¶ added in v0.0.5
type GetSessionStateResponse ¶ added in v0.0.5
type IVRActivity ¶
type IVRActivity struct {
AgentIntf AgentInterface
Agent *Agent
// contains filtered or unexported fields
}
func NewIVRActivity ¶
func NewIVRActivity(agentIntf AgentInterface) *IVRActivity
func (*IVRActivity) OnDtmf ¶
func (i *IVRActivity) OnDtmf(digit string)
func (*IVRActivity) SetDigitCallback ¶
func (*IVRActivity) Start ¶
func (i *IVRActivity) Start()
func (*IVRActivity) Stop ¶
func (i *IVRActivity) Stop()
type InputDetails ¶
type InputDetails struct {
Modality string
}
func DefaultInputDetails ¶
func DefaultInputDetails() InputDetails
type LLMGenerationData ¶
type LLMGenerationData struct {
TextCh chan string
FunctionCh chan *llm.FunctionToolCall
FullTextCh chan string // receives the complete assembled text when streaming is done
GeneratedText string
Usage *llm.CompletionUsage
}
func PerformLLMInference ¶
func PerformLLMInference(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []interface{}) (*LLMGenerationData, error)
type LLMNodeFunc ¶ added in v0.0.5
type LLMNodeFunc func(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []interface{}) (*LLMGenerationData, error)
type LLMTurnDetector ¶
type LLMTurnDetector struct {
// contains filtered or unexported fields
}
LLMTurnDetector uses an LLM to predict if the user has finished speaking. It sends the recent conversation history to the LLM and asks for a probability score.
func NewLLMTurnDetector ¶
func NewLLMTurnDetector(llmInstance llm.LLM) *LLMTurnDetector
func (*LLMTurnDetector) PredictEndOfTurn ¶
func (m *LLMTurnDetector) PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)
type MediaPublisher ¶ added in v0.0.5
type MetricsCollectedEvent ¶
type MetricsCollectedEvent struct {
Metrics telemetry.AgentMetrics `json:"metrics"`
CreatedAt time.Time `json:"created_at"`
}
func (*MetricsCollectedEvent) GetType ¶
func (e *MetricsCollectedEvent) GetType() string
type MultimodalAgent ¶
type MultimodalAgent struct {
PublishAudio func(frame *model.AudioFrame) error
// contains filtered or unexported fields
}
func NewMultimodalAgent ¶
func NewMultimodalAgent( m llm.RealtimeModel, chatCtx *llm.ChatContext, ) *MultimodalAgent
func (*MultimodalAgent) OnAudioFrame ¶
func (ma *MultimodalAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
func (*MultimodalAgent) Start ¶
func (ma *MultimodalAgent) Start(ctx context.Context, s *AgentSession) error
type ParticipantActiveEvent ¶ added in v0.0.5
type ParticipantActiveEvent struct {
ParticipantID string `json:"participant_id"`
Identity string `json:"identity"`
Active bool `json:"active"`
CreatedAt time.Time `json:"created_at"`
}
func (*ParticipantActiveEvent) GetType ¶ added in v0.0.5
func (e *ParticipantActiveEvent) GetType() string
type ParticipantReference ¶ added in v0.0.5
type PipelineAgent ¶
func NewPipelineAgent ¶
func (*PipelineAgent) GenerateReply ¶ added in v0.0.5
func (va *PipelineAgent) GenerateReply(speech *SpeechHandle)
func (*PipelineAgent) OnAudioFrame ¶
func (va *PipelineAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
func (*PipelineAgent) Start ¶
func (va *PipelineAgent) Start(ctx context.Context, s *AgentSession) error
func (*PipelineAgent) Stop ¶ added in v0.0.5
func (va *PipelineAgent) Stop()
type PlayHandle ¶
type PlayHandle struct {
// contains filtered or unexported fields
}
func (*PlayHandle) Done ¶
func (h *PlayHandle) Done() bool
func (*PlayHandle) Stop ¶
func (h *PlayHandle) Stop()
func (*PlayHandle) WaitForPlayout ¶
func (h *PlayHandle) WaitForPlayout()
type PlaybackFinishedEvent ¶ added in v0.0.5
type PlaybackStartedEvent ¶ added in v0.0.5
type Plugin ¶
func RegisteredPlugins ¶
func RegisteredPlugins() []Plugin
type QueueIO ¶
type QueueIO struct {
// contains filtered or unexported fields
}
func NewQueueIO ¶
func NewQueueIO() *QueueIO
type RealtimeAudioOutputNodeFunc ¶ added in v0.0.5
type RealtimeAudioOutputNodeFunc func(ctx context.Context, audio <-chan *model.AudioFrame) (<-chan *model.AudioFrame, error)
type RecognitionHooks ¶
type RecognitionHooks interface {
OnStartOfSpeech(ev *vad.VADEvent)
OnEndOfSpeech(ev *vad.VADEvent)
OnInterimTranscript(ev *stt.SpeechEvent)
OnFinalTranscript(ev *stt.SpeechEvent)
}
type RecordingOptions ¶
type RunAssert ¶
type RunAssert struct {
ChatCtx *llm.ChatContext
// contains filtered or unexported fields
}
func (*RunAssert) ContainsMessage ¶
func (*RunAssert) IsFunctionCall ¶
type RunContext ¶
type RunContext struct {
Session *AgentSession
SpeechHandle *SpeechHandle
FunctionCall *llm.FunctionCall
}
func GetRunContext ¶
func GetRunContext(ctx context.Context) *RunContext
func (*RunContext) WaitForPlayout ¶
func (r *RunContext) WaitForPlayout(ctx context.Context) error
type RunResult ¶
type RunResult[T any] struct { ChatCtx *llm.ChatContext Timestamp float64 Expect *RunAssert FinalOutput T Events []RunEvent // contains filtered or unexported fields }
func GenerateTypedReply ¶ added in v0.0.5
func GenerateTypedReply[T any](ctx context.Context, s *AgentSession, userInput string, opts *GenerateReplyOpts) (*RunResult[T], error)
func NewRunResult ¶
func NewRunResult[T any](chatCtx *llm.ChatContext) *RunResult[T]
func (*RunResult[T]) WatchHandle ¶ added in v0.0.5
func (r *RunResult[T]) WatchHandle(ctx context.Context, handle *SpeechHandle)
type RunResultInterface ¶ added in v0.0.5
type RunResultInterface interface {
AddEvent(ev RunEvent)
WatchTask(done <-chan struct{})
}
type STTNodeFunc ¶ added in v0.0.5
type STTNodeFunc func(ctx context.Context, s stt.STT, audio <-chan *model.AudioFrame) (<-chan *stt.SpeechEvent, error)
type SendMessageRequest ¶ added in v0.0.5
type SendMessageRequest struct {
Text string `json:"text"`
}
type SendMessageResponse ¶ added in v0.0.5
type SessionInfo ¶ added in v0.0.5
type SessionInfo interface {
LocalParticipantID() string
}
type SessionReport ¶
type SessionReport struct {
RecordingOptions RecordingOptions `json:"recording_options"`
JobID string `json:"job_id"`
RoomID string `json:"room_id"`
Room string `json:"room"`
Options AgentSessionOptions `json:"options"`
Events []any `json:"events"`
Timeline []*AgentEvent `json:"timeline,omitempty"`
ChatHistory *llm.ChatContext `json:"chat_history"`
AudioRecordingPath *string `json:"audio_recording_path,omitempty"`
AudioRecordingStartedAt *float64 `json:"audio_recording_started_at,omitempty"`
Duration *float64 `json:"duration,omitempty"`
StartedAt *float64 `json:"started_at,omitempty"`
Timestamp float64 `json:"timestamp"`
// contains filtered or unexported fields
}
func NewSessionReport ¶
func NewSessionReport() *SessionReport
func (*SessionReport) AddEvent ¶ added in v0.0.5
func (r *SessionReport) AddEvent(event any)
func (*SessionReport) SetChatHistory ¶ added in v0.0.5
func (r *SessionReport) SetChatHistory(chatCtx *llm.ChatContext)
func (*SessionReport) SetTimeline ¶ added in v0.0.5
func (r *SessionReport) SetTimeline(events []*AgentEvent)
func (*SessionReport) ToDict ¶ added in v0.0.5
func (r *SessionReport) ToDict() map[string]any
type SpeechCreatedEvent ¶
type SpeechCreatedEvent struct {
UserInitiated bool `json:"user_initiated"`
Source string `json:"source"`
SpeechHandle *SpeechHandle `json:"-"`
ParticipantID string `json:"participant_id,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
func (*SpeechCreatedEvent) GetType ¶
func (e *SpeechCreatedEvent) GetType() string
type SpeechHandle ¶
type SpeechHandle struct {
ID string
AllowInterruptions bool
InputDetails InputDetails
Priority int
CreatedAt time.Time
FinalOutput any
ManualText string
OnItemAdded func(item llm.ChatItem)
RunResult RunResultInterface
// contains filtered or unexported fields
}
func NewSpeechHandle ¶
func NewSpeechHandle(allowInterruptions bool, inputDetails InputDetails) *SpeechHandle
func (*SpeechHandle) Error ¶ added in v0.0.5
func (s *SpeechHandle) Error() error
func (*SpeechHandle) Interrupt ¶
func (s *SpeechHandle) Interrupt(force bool) error
func (*SpeechHandle) IsDone ¶
func (s *SpeechHandle) IsDone() bool
func (*SpeechHandle) IsInterrupted ¶
func (s *SpeechHandle) IsInterrupted() bool
func (*SpeechHandle) IsScheduled ¶
func (s *SpeechHandle) IsScheduled() bool
func (*SpeechHandle) MarkDone ¶
func (s *SpeechHandle) MarkDone()
func (*SpeechHandle) MarkDoneWithError ¶ added in v0.0.5
func (s *SpeechHandle) MarkDoneWithError(err error)
func (*SpeechHandle) MarkScheduled ¶
func (s *SpeechHandle) MarkScheduled()
type StreamRequest ¶ added in v0.0.5
type StreamResponse ¶ added in v0.0.5
type SyncedAudioOutput ¶ added in v0.0.5
type SyncedAudioOutput struct {
// contains filtered or unexported fields
}
SyncedAudioOutput wraps an AudioOutput and pushes frames to the synchronizer
func NewSyncedAudioOutput ¶ added in v0.0.5
func NewSyncedAudioOutput(sync *TranscriptSynchronizer, next AudioOutput) *SyncedAudioOutput
func (*SyncedAudioOutput) CaptureFrame ¶ added in v0.0.5
func (s *SyncedAudioOutput) CaptureFrame(frame *model.AudioFrame) error
func (*SyncedAudioOutput) ClearBuffer ¶ added in v0.0.5
func (s *SyncedAudioOutput) ClearBuffer()
func (*SyncedAudioOutput) Flush ¶ added in v0.0.5
func (s *SyncedAudioOutput) Flush()
func (*SyncedAudioOutput) Label ¶ added in v0.0.5
func (s *SyncedAudioOutput) Label() string
func (*SyncedAudioOutput) OnAttached ¶ added in v0.0.5
func (s *SyncedAudioOutput) OnAttached()
func (*SyncedAudioOutput) OnDetached ¶ added in v0.0.5
func (s *SyncedAudioOutput) OnDetached()
func (*SyncedAudioOutput) OnPlaybackFinished ¶ added in v0.0.5
func (s *SyncedAudioOutput) OnPlaybackFinished(f func(ev PlaybackFinishedEvent))
func (*SyncedAudioOutput) OnPlaybackStarted ¶ added in v0.0.5
func (s *SyncedAudioOutput) OnPlaybackStarted(f func(ev PlaybackStartedEvent))
func (*SyncedAudioOutput) Pause ¶ added in v0.0.5
func (s *SyncedAudioOutput) Pause()
func (*SyncedAudioOutput) Resume ¶ added in v0.0.5
func (s *SyncedAudioOutput) Resume()
func (*SyncedAudioOutput) WaitForPlayout ¶ added in v0.0.5
func (s *SyncedAudioOutput) WaitForPlayout(ctx context.Context) error
type SyncedTextOutput ¶ added in v0.0.5
type SyncedTextOutput struct {
// contains filtered or unexported fields
}
SyncedTextOutput wraps a TextOutput and pushes text to the synchronizer
func NewSyncedTextOutput ¶ added in v0.0.5
func NewSyncedTextOutput(sync *TranscriptSynchronizer, next TextOutput) *SyncedTextOutput
func (*SyncedTextOutput) CaptureText ¶ added in v0.0.5
func (s *SyncedTextOutput) CaptureText(text string) error
func (*SyncedTextOutput) Flush ¶ added in v0.0.5
func (s *SyncedTextOutput) Flush()
func (*SyncedTextOutput) Label ¶ added in v0.0.5
func (s *SyncedTextOutput) Label() string
func (*SyncedTextOutput) OnAttached ¶ added in v0.0.5
func (s *SyncedTextOutput) OnAttached()
func (*SyncedTextOutput) OnDetached ¶ added in v0.0.5
func (s *SyncedTextOutput) OnDetached()
func (*SyncedTextOutput) SetSegmentID ¶ added in v0.0.5
func (s *SyncedTextOutput) SetSegmentID(id string)
type TTSGenerationData ¶
type TTSGenerationData struct {
AudioCh chan *model.AudioFrame
AlignedTextCh chan string
TTFB time.Duration
}
func PerformTTSInference ¶
type TTSNodeFunc ¶ added in v0.0.5
type Tagger ¶
type Tagger struct {
// contains filtered or unexported fields
}
func (*Tagger) Evaluation ¶
func (t *Tagger) Evaluation(result *EvaluationResult)
func (*Tagger) OutcomeReason ¶
type TextInput ¶ added in v0.0.5
type TextInput interface {
Label() string
OnAttached()
OnDetached()
}
TextInput represents a source of text (e.g., chat messages or remote text tracks)
type TextInputCallback ¶ added in v0.0.5
type TextInputCallback func(s *AgentSession, ev TextInputEvent) error
type TextInputEvent ¶ added in v0.0.5
type TextInputEvent struct {
Text string `json:"text"`
Participant lksdk.Participant `json:"-"`
}
type TextOutput ¶ added in v0.0.5
type TextOutput interface {
Label() string
CaptureText(text string) error
SetSegmentID(id string)
Flush()
OnAttached()
OnDetached()
}
TextOutput represents a destination for text (e.g., transcriptions)
type ToolExecutionOutput ¶
type ToolExecutionOutput struct {
FncCall llm.FunctionCall
FncCallOut *llm.FunctionCallOutput
RawOutput any
RawError error
ReplyRequired bool
AgentTask AgentInterface
}
type TranscriptSynchronizer ¶
type TranscriptSynchronizer struct {
// contains filtered or unexported fields
}
TranscriptSynchronizer drip-feeds text to match the playout speed of audio.
func NewTranscriptSynchronizer ¶
func NewTranscriptSynchronizer(speakingRate float64, refreshRate time.Duration) *TranscriptSynchronizer
NewTranscriptSynchronizer initializes the synchronizer. Default speaking rate is usually ~3.83 syllables/sec.
func (*TranscriptSynchronizer) Close ¶
func (s *TranscriptSynchronizer) Close()
func (*TranscriptSynchronizer) EventCh ¶
func (s *TranscriptSynchronizer) EventCh() <-chan SyncEvent
func (*TranscriptSynchronizer) Interrupt ¶
func (s *TranscriptSynchronizer) Interrupt()
func (*TranscriptSynchronizer) PushAudio ¶
func (s *TranscriptSynchronizer) PushAudio(frame *model.AudioFrame)
func (*TranscriptSynchronizer) PushText ¶
func (s *TranscriptSynchronizer) PushText(text string)
func (*TranscriptSynchronizer) RotateSegment ¶ added in v0.0.5
func (s *TranscriptSynchronizer) RotateSegment()
RotateSegment flushes the remaining text buffer and resets the time accumulators for a new audio segment.
func (*TranscriptSynchronizer) SetSegmentID ¶ added in v0.0.5
func (s *TranscriptSynchronizer) SetSegmentID(id string)
type TranscriptionFilter ¶
type TranscriptionFilter struct {
SpeakingRate float64
}
func NewTranscriptionFilter ¶
func NewTranscriptionFilter() *TranscriptionFilter
type TranscriptionNodeFunc ¶ added in v0.0.5
type TransitionActivityAction ¶ added in v0.0.5
type TransitionActivityAction string
const ( TransitionActivityClose TransitionActivityAction = "close" TransitionActivityPause TransitionActivityAction = "pause" TransitionActivityStart TransitionActivityAction = "start" TransitionActivityResume TransitionActivityAction = "resume" )
type TurnDetectionMode ¶
type TurnDetectionMode string
const ( TurnDetectionModeSTT TurnDetectionMode = "stt" TurnDetectionModeVAD TurnDetectionMode = "vad" TurnDetectionModeRealtimeLLM TurnDetectionMode = "realtime_llm" TurnDetectionModeManual TurnDetectionMode = "manual" )
type TurnDetector ¶
type UpdateAgentOpts ¶ added in v0.0.5
type UpdateAgentOpts struct {
PreviousActivity TransitionActivityAction
NewActivity TransitionActivityAction
}
type UserInputTranscribedEvent ¶
type UserInputTranscribedEvent struct {
Transcript string `json:"transcript"`
IsFinal bool `json:"is_final"`
SpeakerID string `json:"speaker_id,omitempty"`
Language string `json:"language,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
func (*UserInputTranscribedEvent) GetType ¶
func (e *UserInputTranscribedEvent) GetType() string
type UserStateChangedEvent ¶
type UserStateChangedEvent struct {
OldState UserState `json:"old_state"`
NewState UserState `json:"new_state"`
CreatedAt time.Time `json:"created_at"`
}
func (*UserStateChangedEvent) GetType ¶ added in v0.0.5
func (e *UserStateChangedEvent) GetType() string
type VideoGenerator ¶ added in v0.0.5
type VideoGenerator interface {
PushAudio(frame *model.AudioFrame) error
Stream() <-chan interface{} // Yields *model.AudioFrame, *model.VideoFrame, or *model.AudioSegmentEnd
ClearBuffer() error
Close() error
}
type VideoInput ¶ added in v0.0.5
type VideoInput interface {
Label() string
Stream() <-chan *model.VideoFrame
OnAttached()
OnDetached()
}
VideoInput represents a source of video frames (e.g., camera or remote track)
type VideoNodeFunc ¶ added in v0.0.5
type VideoNodeFunc func(ctx context.Context, video <-chan *model.VideoFrame) error
type VideoOutput ¶ added in v0.0.5
type VideoOutput interface {
Label() string
CaptureVideoFrame(frame *model.VideoFrame) error
Flush()
OnAttached()
OnDetached()
}
VideoOutput represents a destination for video frames (e.g., screen or remote track)
type VoiceActivityVideoSampler ¶
type VoiceActivityVideoSampler struct {
// contains filtered or unexported fields
}
VoiceActivityVideoSampler samples video frames at a reduced rate (e.g. 1 fps) only when the user is speaking, to reduce LLM context token usage.
func NewVoiceActivityVideoSampler ¶
func NewVoiceActivityVideoSampler(session *AgentSession, sampleRate float64, opts images.EncodeOptions) *VoiceActivityVideoSampler
func (*VoiceActivityVideoSampler) OnVideoFrame ¶
func (s *VoiceActivityVideoSampler) OnVideoFrame(ctx context.Context, frame *images.VideoFrame) bool
OnVideoFrame should be called for every incoming WebRTC video frame. It returns true if the frame should be forwarded to the LLM.
func (*VoiceActivityVideoSampler) SetSpeaking ¶
func (s *VoiceActivityVideoSampler) SetSpeaking(speaking bool)