agent

package

v0.0.5 Latest Latest Go to latest Published: Apr 22, 2026 License: MIT Imports: 40 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/cavos-io/rtp-agent

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func DefaultTextInputCallback(s *AgentSession, ev TextInputEvent) error
func GetOutput[T any](ctx context.Context, r *RunResult[T]) (T, error)
func PerformToolExecutions(ctx context.Context, functionCh <-chan *llm.FunctionToolCall, ...) <-chan ToolExecutionOutput
func RegisterPlugin(p Plugin)
func UploadSessionReport(cloudURL string, apiKey string, apiSecret string, agentName string, ...) error
func WithRunContext(ctx context.Context, rc *RunContext) context.Context
type AVSynchronizer
type Agent
- func NewAgent(instructions string) *Agent
- func (a *Agent) GetActivity() *AgentActivity
- func (a *Agent) GetAgent() *Agent
- func (a *Agent) OnEnter(ctx context.Context) error
- func (a *Agent) OnExit(ctx context.Context) error
- func (a *Agent) OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
- func (a *Agent) Start(session *AgentSession, agentIntf AgentInterface) *AgentActivity
- func (a *Agent) UpdateInstructions(ctx context.Context, instructions string) error
- func (a *Agent) UpdateTools(ctx context.Context, tools []interface{}) error
type AgentActivity
- func NewAgentActivity(agentIntf AgentInterface, session *AgentSession, parentCtx context.Context) *AgentActivity
- func (a *AgentActivity) AClose()
- func (a *AgentActivity) CaptureVideoFrame(frame *model.VideoFrame) error
- func (a *AgentActivity) ClearUserTurn()
- func (a *AgentActivity) CommitUserTurn(opts *CommitUserTurnOpts)
- func (a *AgentActivity) Drain(ctx context.Context) error
- func (a *AgentActivity) Interrupt(force bool) error
- func (a *AgentActivity) OnEndOfSpeech(ev *vad.VADEvent)
- func (a *AgentActivity) OnFinalTranscript(ev *stt.SpeechEvent)
- func (a *AgentActivity) OnInterimTranscript(ev *stt.SpeechEvent)
- func (a *AgentActivity) OnStartOfSpeech(ev *vad.VADEvent)
- func (a *AgentActivity) Pause() error
- func (a *AgentActivity) PauseScheduling()
- func (a *AgentActivity) PushAudio(frame *model.AudioFrame) error
- func (a *AgentActivity) PushVideo(frame *model.VideoFrame) error
- func (a *AgentActivity) Resume() error
- func (a *AgentActivity) ResumeScheduling()
- func (a *AgentActivity) ScheduleSpeech(speech *SpeechHandle, priority int, force bool) error
- func (a *AgentActivity) Start()
- func (a *AgentActivity) Stop()
- func (a *AgentActivity) UpdateOptions(opts AgentSessionOptions)
type AgentEvent
- func NewAgentEvent(ev Event) *AgentEvent
- func (ae *AgentEvent) MarshalJSON() ([]byte, error)
- func (ae *AgentEvent) UnmarshalJSON(data []byte) error
type AgentFalseInterruptionEvent
- func (e *AgentFalseInterruptionEvent) GetType() string
type AgentHandoffEvent
- func (e *AgentHandoffEvent) GetType() string
type AgentHandoffRunEvent
- func (e *AgentHandoffRunEvent) GetCreatedAt() time.Time
- func (e *AgentHandoffRunEvent) GetItem() llm.ChatItem
- func (e *AgentHandoffRunEvent) RunEventType() string
type AgentInput
type AgentInterface
type AgentOutput
type AgentSession
- func NewAgentSession(agent AgentInterface, room *lksdk.Room, opts AgentSessionOptions) *AgentSession
- func (s *AgentSession) ClearUserTurn()
- func (s *AgentSession) Close() error
- func (s *AgentSession) CommitUserTurn(opts *CommitUserTurnOpts)
- func (s *AgentSession) GenerateReply(ctx context.Context, userInput string, allowInterruptions bool) (any, error)
- func (s *AgentSession) GetAgentTrackSID() string
- func (s *AgentSession) GetPublisher() interface{ ... }
- func (s *AgentSession) Interrupt(ctx context.Context) error
- func (s *AgentSession) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
- func (s *AgentSession) Pause() error
- func (s *AgentSession) PublishAgentTranscript(text string)
- func (s *AgentSession) PublishUserTranscript(text string)
- func (s *AgentSession) Resume() error
- func (s *AgentSession) Say(text string, allowInterruptions bool) (*SpeechHandle, error)
- func (s *AgentSession) SetAgentTrackSID(sid string)
- func (s *AgentSession) SetAudioOutput(out AudioOutput)
- func (s *AgentSession) SetRemoteTrackSID(sid string)
- func (s *AgentSession) SetRemoteUserIdentity(identity string)
- func (s *AgentSession) SetRoom(room *lksdk.Room)
- func (s *AgentSession) SetVideoOutput(out VideoOutput)
- func (s *AgentSession) Start(ctx context.Context) error
- func (s *AgentSession) Stop(ctx context.Context) error
- func (s *AgentSession) TimelineSnapshot() []*AgentEvent
- func (s *AgentSession) UpdateAgent(agent AgentInterface, opts *UpdateAgentOpts) error
- func (s *AgentSession) UpdateAgentState(state AgentState)
- func (s *AgentSession) UpdateOptions(opts AgentSessionOptions)
- func (s *AgentSession) UpdateUserState(state UserState)
type AgentSessionOptions
type AgentState
type AgentStateChangedEvent
- func (e *AgentStateChangedEvent) GetType() string
type AgentTask
- func NewAgentTask[T any](instructions string) *AgentTask[T]
- func (t *AgentTask[T]) Complete(result T)
- func (t *AgentTask[T]) Fail(err error)
- func (t *AgentTask[T]) WaitAny(ctx context.Context) (any, error)
type AudioConfig
type AudioInput
type AudioOutput
type AudioReceiver
type AudioRecognition
- func NewAudioRecognition(session *AgentSession, hooks RecognitionHooks, s stt.STT, v vad.VAD) *AudioRecognition
- func (ar *AudioRecognition) Close()
- func (ar *AudioRecognition) Flush() error
- func (ar *AudioRecognition) PushAudio(frame *model.AudioFrame) error
- func (ar *AudioRecognition) Start(ctx context.Context) error
type AudioSource
type AvatarIO
type AvatarOptions
type AvatarRunner
- func NewAvatarRunner(room *lksdk.Room, audioRecv AudioReceiver, videoGen VideoGenerator, ...) *AvatarRunner
- func (r *AvatarRunner) SendLipSyncEvent(ctx context.Context, data []byte) error
- func (r *AvatarRunner) Start(ctx context.Context) error
- func (r *AvatarRunner) Stop()
type BackgroundAudioPlayer
- func NewBackgroundAudioPlayer(ambientSound, thinkingSound interface{}) *BackgroundAudioPlayer
- func (p *BackgroundAudioPlayer) AgentStateChanged(newState AgentState)
- func (p *BackgroundAudioPlayer) Close() error
- func (p *BackgroundAudioPlayer) Play(audio interface{}, loop bool) *PlayHandle
- func (p *BackgroundAudioPlayer) Start(room *lksdk.Room, agentSession *AgentSession) error
type BuiltinAudioClip
- func (b BuiltinAudioClip) Path() string
type ChatMessageRunEvent
- func (e *ChatMessageRunEvent) GetCreatedAt() time.Time
- func (e *ChatMessageRunEvent) GetItem() llm.ChatItem
- func (e *ChatMessageRunEvent) RunEventType() string
type ClientEventPayload
type ClientEventsDispatcher
- func NewClientEventsDispatcher(room *lksdk.Room, session *AgentSession) *ClientEventsDispatcher
- func (d *ClientEventsDispatcher) Close()
- func (d *ClientEventsDispatcher) DispatchAgentState(state AgentState)
- func (d *ClientEventsDispatcher) DispatchUserState(state UserState)
- func (d *ClientEventsDispatcher) RegisterTextInput(cb TextInputCallback)
type CloseEvent
- func (e *CloseEvent) GetType() string
type CloseReason
type CommitUserTurnOpts
type ConversationItemAddedEvent
- func (e *ConversationItemAddedEvent) GetType() string
type DataStreamIO
- func NewDataStreamIO(room *lksdk.Room) *DataStreamIO
- func (io *DataStreamIO) SendAvatarData(ctx context.Context, data []byte) error
type DtmfEvent
type EndOfTurnInfo
type ErrorEvent
- func (e *ErrorEvent) GetType() string
type EvaluationResult
type Event
type EventTimeline
- func NewEventTimeline() *EventTimeline
- func (t *EventTimeline) AddEvent(ev Event)
- func (t *EventTimeline) Clear()
- func (t *EventTimeline) Snapshot() []*AgentEvent
type FunctionCallOutputRunEvent
- func (e *FunctionCallOutputRunEvent) GetCreatedAt() time.Time
- func (e *FunctionCallOutputRunEvent) GetItem() llm.ChatItem
- func (e *FunctionCallOutputRunEvent) RunEventType() string
type FunctionCallRunEvent
- func (e *FunctionCallRunEvent) GetCreatedAt() time.Time
- func (e *FunctionCallRunEvent) GetItem() llm.ChatItem
- func (e *FunctionCallRunEvent) RunEventType() string
type FunctionToolsExecutedEvent
- func (e *FunctionToolsExecutedEvent) GetType() string
type GenerateReplyOpts
type GetAgentInfoResponse
type GetChatHistoryResponse
type GetSessionStateResponse
type IVRActivity
- func NewIVRActivity(agentIntf AgentInterface) *IVRActivity
- func (i *IVRActivity) OnDtmf(digit string)
- func (i *IVRActivity) SetDigitCallback(timeout time.Duration, cb func(buffer string) (bool, error))
- func (i *IVRActivity) Start()
- func (i *IVRActivity) Stop()
type InputDetails
- func DefaultInputDetails() InputDetails
type JobResult
type LLMGenerationData
- func PerformLLMInference(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []interface{}) (*LLMGenerationData, error)
type LLMNodeFunc
type LLMTurnDetector
- func NewLLMTurnDetector(llmInstance llm.LLM) *LLMTurnDetector
- func (m *LLMTurnDetector) PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)
type MediaPublisher
type MetricsCollectedEvent
- func (e *MetricsCollectedEvent) GetType() string
type MultimodalAgent
- func NewMultimodalAgent(m llm.RealtimeModel, chatCtx *llm.ChatContext) *MultimodalAgent
- func (ma *MultimodalAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
- func (ma *MultimodalAgent) Start(ctx context.Context, s *AgentSession) error
type ParticipantActiveEvent
- func (e *ParticipantActiveEvent) GetType() string
type ParticipantReference
type PipelineAgent
- func NewPipelineAgent(vad vad.VAD, sttInstance stt.STT, llmObj llm.LLM, ttsInstance tts.TTS, ...) *PipelineAgent
- func (va *PipelineAgent) GenerateReply(speech *SpeechHandle)
- func (va *PipelineAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)
- func (va *PipelineAgent) Start(ctx context.Context, s *AgentSession) error
- func (va *PipelineAgent) Stop()
type PlayHandle
- func (h *PlayHandle) Done() bool
- func (h *PlayHandle) Stop()
- func (h *PlayHandle) WaitForPlayout()
type PlaybackFinishedEvent
type PlaybackStartedEvent
type Plugin
- func RegisteredPlugins() []Plugin
type QueueIO
- func NewQueueIO() *QueueIO
- func (io *QueueIO) ReadQueue() <-chan []byte
- func (io *QueueIO) SendAvatarData(ctx context.Context, data []byte) error
type RealtimeAudioOutputNodeFunc
type RecognitionHooks
type RecordingOptions
type RunAssert
- func (a *RunAssert) ContainsMessage(role llm.ChatRole, content string) *RunAssert
- func (a *RunAssert) HasError() error
- func (a *RunAssert) IsFunctionCall(name string) *RunAssert
- func (a *RunAssert) Judge(ctx context.Context, evaluator evals.Evaluator, llmInstance llm.LLM) (*RunAssert, error)
type RunContext
- func GetRunContext(ctx context.Context) *RunContext
- func (r *RunContext) WaitForPlayout(ctx context.Context) error
type RunEvent
type RunResult
- func GenerateTypedReply[T any](ctx context.Context, s *AgentSession, userInput string, ...) (*RunResult[T], error)
- func NewRunResult[T any](chatCtx *llm.ChatContext) *RunResult[T]
- func (r *RunResult[T]) AddEvent(ev RunEvent)
- func (r *RunResult[T]) Done() <-chan struct{}
- func (r *RunResult[T]) Eval(ctx context.Context, evaluator evals.Evaluator, llmInstance llm.LLM) (*evals.JudgmentResult, error)
- func (r *RunResult[T]) GetEvents() []RunEvent
- func (r *RunResult[T]) Wait(ctx context.Context) error
- func (r *RunResult[T]) WaitAny(ctx context.Context) (T, error)
- func (r *RunResult[T]) WatchHandle(ctx context.Context, handle *SpeechHandle)
- func (r *RunResult[T]) WatchTask(done <-chan struct{})
type RunResultInterface
type STTNodeFunc
type SendMessageRequest
type SendMessageResponse
type SessionInfo
type SessionReport
- func NewSessionReport() *SessionReport
- func (r *SessionReport) AddEvent(event any)
- func (r *SessionReport) SetChatHistory(chatCtx *llm.ChatContext)
- func (r *SessionReport) SetTimeline(events []*AgentEvent)
- func (r *SessionReport) ToDict() map[string]any
type SpeechCreatedEvent
- func (e *SpeechCreatedEvent) GetType() string
type SpeechHandle
- func NewSpeechHandle(allowInterruptions bool, inputDetails InputDetails) *SpeechHandle
- func (s *SpeechHandle) Error() error
- func (s *SpeechHandle) Interrupt(force bool) error
- func (s *SpeechHandle) IsDone() bool
- func (s *SpeechHandle) IsInterrupted() bool
- func (s *SpeechHandle) IsScheduled() bool
- func (s *SpeechHandle) MarkDone()
- func (s *SpeechHandle) MarkDoneWithError(err error)
- func (s *SpeechHandle) MarkScheduled()
- func (s *SpeechHandle) Wait(ctx context.Context) error
type StreamRequest
type StreamResponse
type SyncEvent
type SyncedAudioOutput
- func NewSyncedAudioOutput(sync *TranscriptSynchronizer, next AudioOutput) *SyncedAudioOutput
- func (s *SyncedAudioOutput) CaptureFrame(frame *model.AudioFrame) error
- func (s *SyncedAudioOutput) ClearBuffer()
- func (s *SyncedAudioOutput) Flush()
- func (s *SyncedAudioOutput) Label() string
- func (s *SyncedAudioOutput) OnAttached()
- func (s *SyncedAudioOutput) OnDetached()
- func (s *SyncedAudioOutput) OnPlaybackFinished(f func(ev PlaybackFinishedEvent))
- func (s *SyncedAudioOutput) OnPlaybackStarted(f func(ev PlaybackStartedEvent))
- func (s *SyncedAudioOutput) Pause()
- func (s *SyncedAudioOutput) Resume()
- func (s *SyncedAudioOutput) WaitForPlayout(ctx context.Context) error
type SyncedTextOutput
- func NewSyncedTextOutput(sync *TranscriptSynchronizer, next TextOutput) *SyncedTextOutput
- func (s *SyncedTextOutput) CaptureText(text string) error
- func (s *SyncedTextOutput) Flush()
- func (s *SyncedTextOutput) Label() string
- func (s *SyncedTextOutput) OnAttached()
- func (s *SyncedTextOutput) OnDetached()
- func (s *SyncedTextOutput) SetSegmentID(id string)
type TTSGenerationData
- func PerformTTSInference(ctx context.Context, t tts.TTS, textCh <-chan string) (*TTSGenerationData, error)
type TTSNodeFunc
type Tagger
- func NewTagger() *Tagger
- func (t *Tagger) Add(tag string)
- func (t *Tagger) Evaluation(result *EvaluationResult)
- func (t *Tagger) Fail(reason string)
- func (t *Tagger) OutcomeReason() string
- func (t *Tagger) Remove(tag string)
- func (t *Tagger) Success(reason string)
- func (t *Tagger) Tags() []string
type TaskWaiter
type TextInput
type TextInputCallback
type TextInputEvent
type TextOutput
type ToolExecutionOutput
type TranscriptSynchronizer
- func NewTranscriptSynchronizer(speakingRate float64, refreshRate time.Duration) *TranscriptSynchronizer
- func (s *TranscriptSynchronizer) Close()
- func (s *TranscriptSynchronizer) EventCh() <-chan SyncEvent
- func (s *TranscriptSynchronizer) Interrupt()
- func (s *TranscriptSynchronizer) PushAudio(frame *model.AudioFrame)
- func (s *TranscriptSynchronizer) PushText(text string)
- func (s *TranscriptSynchronizer) RotateSegment()
- func (s *TranscriptSynchronizer) SetSegmentID(id string)
type TranscriptionFilter
- func NewTranscriptionFilter() *TranscriptionFilter
type TranscriptionNodeFunc
type TransitionActivityAction
type TurnDetectionMode
type TurnDetector
type UpdateAgentOpts
type UserInputTranscribedEvent
- func (e *UserInputTranscribedEvent) GetType() string
type UserState
type UserStateChangedEvent
- func (e *UserStateChangedEvent) GetType() string
type VideoGenerator
type VideoInput
type VideoNodeFunc
type VideoOutput
type VoiceActivityVideoSampler
- func NewVoiceActivityVideoSampler(session *AgentSession, sampleRate float64, opts images.EncodeOptions) *VoiceActivityVideoSampler
- func (s *VoiceActivityVideoSampler) OnVideoFrame(ctx context.Context, frame *images.VideoFrame) bool
- func (s *VoiceActivityVideoSampler) SetSpeaking(speaking bool)

Constants ¶

View Source

const (
	TopicAgentRequest  = "lk.agent.request"
	TopicAgentResponse = "lk.agent.response"
	TopicChat          = "lk.chat"
)

View Source

const (
	SpeechPriorityLow    = 0
	SpeechPriorityNormal = 5
	SpeechPriorityHigh   = 10
	InterruptionTimeout  = 5 * time.Second
)

View Source

const TopicClientEvents = "lk-agent-client-events"

Variables ¶

This section is empty.

Functions ¶

func DefaultTextInputCallback ¶ added in v0.0.5

func DefaultTextInputCallback(s *AgentSession, ev TextInputEvent) error

func GetOutput ¶ added in v0.0.5

func GetOutput[T any](ctx context.Context, r *RunResult[T]) (T, error)

GetOutput returns the strictly typed final output of the run, blocking until completion.

func PerformToolExecutions ¶

func PerformToolExecutions(
	ctx context.Context,
	functionCh <-chan *llm.FunctionToolCall,
	toolCtx *llm.ToolContext,
) <-chan ToolExecutionOutput

func RegisterPlugin ¶

func RegisterPlugin(p Plugin)

func UploadSessionReport ¶

func UploadSessionReport(
	cloudURL string,
	apiKey string,
	apiSecret string,
	agentName string,
	report *SessionReport,
) error

func WithRunContext ¶

func WithRunContext(ctx context.Context, rc *RunContext) context.Context

Types ¶

type AVSynchronizer ¶ added in v0.0.5

type AVSynchronizer interface {
	Push(frame interface{}) error
	Close() error
}

type Agent ¶

type Agent struct {
	ID           string
	Instructions string
	ChatCtx      *llm.ChatContext
	Tools        []interface{}

	TurnDetection TurnDetectionMode
	TurnDetector  TurnDetector
	STT           stt.STT
	VAD           vad.VAD
	LLM           llm.LLM
	TTS           tts.TTS

	LLMNode                 LLMNodeFunc
	TTSNode                 TTSNodeFunc
	STTNode                 STTNodeFunc
	VideoNode               VideoNodeFunc
	TranscriptionNode       TranscriptionNodeFunc
	RealtimeAudioOutputNode RealtimeAudioOutputNodeFunc

	AllowInterruptions        bool
	MinConsecutiveSpeechDelay float64
	UseTTSAlignedTranscript   bool
	MinEndpointingDelay       float64
	MaxEndpointingDelay       float64
	// contains filtered or unexported fields
}

func NewAgent ¶

func NewAgent(instructions string) *Agent

func (*Agent) GetActivity ¶

func (a *Agent) GetActivity() *AgentActivity

func (*Agent) GetAgent ¶

func (a *Agent) GetAgent() *Agent

func (*Agent) OnEnter ¶

func (a *Agent) OnEnter(ctx context.Context) error

func (*Agent) OnExit ¶

func (a *Agent) OnExit(ctx context.Context) error

func (*Agent) OnUserTurnCompleted ¶

func (a *Agent) OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error

func (*Agent) Start ¶

func (a *Agent) Start(session *AgentSession, agentIntf AgentInterface) *AgentActivity

func (*Agent) UpdateInstructions ¶

func (a *Agent) UpdateInstructions(ctx context.Context, instructions string) error

func (*Agent) UpdateTools ¶

func (a *Agent) UpdateTools(ctx context.Context, tools []interface{}) error

type AgentActivity ¶

type AgentActivity struct {
	AgentIntf AgentInterface
	Agent     *Agent
	Session   *AgentSession
	// contains filtered or unexported fields
}

AgentActivity handles the internal event loops, I/O processing, and speech generation queue for an Agent.

func NewAgentActivity ¶

func NewAgentActivity(agentIntf AgentInterface, session *AgentSession, parentCtx context.Context) *AgentActivity

func (*AgentActivity) AClose ¶ added in v0.0.5

func (a *AgentActivity) AClose()

func (*AgentActivity) CaptureVideoFrame ¶ added in v0.0.5

func (a *AgentActivity) CaptureVideoFrame(frame *model.VideoFrame) error

func (*AgentActivity) ClearUserTurn ¶ added in v0.0.5

func (a *AgentActivity) ClearUserTurn()

func (*AgentActivity) CommitUserTurn ¶ added in v0.0.5

func (a *AgentActivity) CommitUserTurn(opts *CommitUserTurnOpts)

func (*AgentActivity) Drain ¶ added in v0.0.5

func (a *AgentActivity) Drain(ctx context.Context) error

func (*AgentActivity) Interrupt ¶ added in v0.0.5

func (a *AgentActivity) Interrupt(force bool) error

func (*AgentActivity) OnEndOfSpeech ¶

func (a *AgentActivity) OnEndOfSpeech(ev *vad.VADEvent)

func (*AgentActivity) OnFinalTranscript ¶

func (a *AgentActivity) OnFinalTranscript(ev *stt.SpeechEvent)

func (*AgentActivity) OnInterimTranscript ¶ added in v0.0.5

func (a *AgentActivity) OnInterimTranscript(ev *stt.SpeechEvent)

func (*AgentActivity) OnStartOfSpeech ¶

func (a *AgentActivity) OnStartOfSpeech(ev *vad.VADEvent)

Event callbacks from RecognitionHooks

func (*AgentActivity) Pause ¶ added in v0.0.5

func (a *AgentActivity) Pause() error

func (*AgentActivity) PauseScheduling ¶ added in v0.0.5

func (a *AgentActivity) PauseScheduling()

func (*AgentActivity) PushAudio ¶ added in v0.0.5

func (a *AgentActivity) PushAudio(frame *model.AudioFrame) error

func (*AgentActivity) PushVideo ¶ added in v0.0.5

func (a *AgentActivity) PushVideo(frame *model.VideoFrame) error

func (*AgentActivity) Resume ¶ added in v0.0.5

func (a *AgentActivity) Resume() error

func (*AgentActivity) ResumeScheduling ¶ added in v0.0.5

func (a *AgentActivity) ResumeScheduling()

func (*AgentActivity) ScheduleSpeech ¶

func (a *AgentActivity) ScheduleSpeech(speech *SpeechHandle, priority int, force bool) error

func (*AgentActivity) Start ¶

func (a *AgentActivity) Start()

func (*AgentActivity) Stop ¶

func (a *AgentActivity) Stop()

func (*AgentActivity) UpdateOptions ¶ added in v0.0.5

func (a *AgentActivity) UpdateOptions(opts AgentSessionOptions)

type AgentEvent ¶ added in v0.0.5

type AgentEvent struct {
	Type      string  `json:"type"`
	Timestamp float64 `json:"timestamp"`

	UserStateChanged       *UserStateChangedEvent       `json:"user_state_changed,omitempty"`
	AgentStateChanged      *AgentStateChangedEvent      `json:"agent_state_changed,omitempty"`
	UserInputTranscribed   *UserInputTranscribedEvent   `json:"user_input_transcribed,omitempty"`
	AgentFalseInterruption *AgentFalseInterruptionEvent `json:"agent_false_interruption,omitempty"`
	MetricsCollected       *MetricsCollectedEvent       `json:"metrics_collected,omitempty"`
	ConversationItemAdded  *ConversationItemAddedEvent  `json:"conversation_item_added,omitempty"`
	FunctionToolsExecuted  *FunctionToolsExecutedEvent  `json:"function_tools_executed,omitempty"`
	AgentHandoff           *AgentHandoffEvent           `json:"agent_handoff,omitempty"`
	SpeechCreated          *SpeechCreatedEvent          `json:"speech_created,omitempty"`
	Error                  *ErrorEvent                  `json:"error,omitempty"`
	Close                  *CloseEvent                  `json:"close,omitempty"`
	ParticipantActive      *ParticipantActiveEvent      `json:"participant_active,omitempty"`
}

func NewAgentEvent ¶ added in v0.0.5

func NewAgentEvent(ev Event) *AgentEvent

func (*AgentEvent) MarshalJSON ¶ added in v0.0.5

func (ae *AgentEvent) MarshalJSON() ([]byte, error)

func (*AgentEvent) UnmarshalJSON ¶ added in v0.0.5

func (ae *AgentEvent) UnmarshalJSON(data []byte) error

type AgentFalseInterruptionEvent ¶ added in v0.0.5

type AgentFalseInterruptionEvent struct {
	Resumed   bool      `json:"resumed"`
	CreatedAt time.Time `json:"created_at"`
}

func (*AgentFalseInterruptionEvent) GetType ¶ added in v0.0.5

func (e *AgentFalseInterruptionEvent) GetType() string

type AgentHandoffEvent ¶ added in v0.0.5

type AgentHandoffEvent struct {
	OldAgent   AgentInterface    `json:"-"`
	NewAgent   AgentInterface    `json:"-"`
	OldAgentID string            `json:"old_agent_id"`
	NewAgentID string            `json:"new_agent_id"`
	Handoff    *llm.AgentHandoff `json:"handoff"`
	CreatedAt  time.Time         `json:"created_at"`
}

func (*AgentHandoffEvent) GetType ¶ added in v0.0.5

func (e *AgentHandoffEvent) GetType() string

type AgentHandoffRunEvent ¶ added in v0.0.5

type AgentHandoffRunEvent struct {
	Item     *llm.AgentHandoff
	OldAgent AgentInterface
	NewAgent AgentInterface
}

func (*AgentHandoffRunEvent) GetCreatedAt ¶ added in v0.0.5

func (e *AgentHandoffRunEvent) GetCreatedAt() time.Time

func (*AgentHandoffRunEvent) GetItem ¶ added in v0.0.5

func (e *AgentHandoffRunEvent) GetItem() llm.ChatItem

func (*AgentHandoffRunEvent) RunEventType ¶ added in v0.0.5

func (e *AgentHandoffRunEvent) RunEventType() string

type AgentInput ¶ added in v0.0.5

type AgentInput struct {
	Audio AudioInput
	Text  TextInput
	Video VideoInput
}

type AgentInterface ¶

type AgentInterface interface {
	OnEnter(ctx context.Context) error
	OnExit(ctx context.Context) error
	OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
	GetAgent() *Agent
	GetActivity() *AgentActivity
}

type AgentOutput ¶ added in v0.0.5

type AgentOutput struct {
	Audio         AudioOutput
	Transcription TextOutput
	Video         VideoOutput
	Publisher     MediaPublisher
}

type AgentSession ¶

type AgentSession struct {
	Options AgentSessionOptions

	ChatCtx   *llm.ChatContext
	Agent     AgentInterface
	STT       stt.STT
	VAD       vad.VAD
	LLM       llm.LLM
	TTS       tts.TTS
	Tools     []interface{}
	Assistant *PipelineAgent
	Room      *lksdk.Room

	Input  AgentInput
	Output AgentOutput

	MetricsCollector *telemetry.UsageCollector
	Timeline         *EventTimeline

	UserState  UserState
	AgentState AgentState

	// Transcript attribution — set by RoomIO when tracks are established.
	RemoteUserIdentity string
	RemoteTrackSID     string
	AgentTrackSID      string

	Activity *AgentActivity

	// Event channels
	AgentStateChangedCh chan AgentStateChangedEvent
	UserStateChangedCh  chan UserStateChangedEvent
	// contains filtered or unexported fields
}

func NewAgentSession ¶

func NewAgentSession(agent AgentInterface, room *lksdk.Room, opts AgentSessionOptions) *AgentSession

func (*AgentSession) ClearUserTurn ¶ added in v0.0.5

func (s *AgentSession) ClearUserTurn()

func (*AgentSession) Close ¶ added in v0.0.5

func (s *AgentSession) Close() error

func (*AgentSession) CommitUserTurn ¶ added in v0.0.5

func (s *AgentSession) CommitUserTurn(opts *CommitUserTurnOpts)

func (*AgentSession) GenerateReply ¶

func (s *AgentSession) GenerateReply(ctx context.Context, userInput string, allowInterruptions bool) (any, error)

func (*AgentSession) GetAgentTrackSID ¶

func (s *AgentSession) GetAgentTrackSID() string

func (*AgentSession) GetPublisher ¶ added in v0.0.5

func (s *AgentSession) GetPublisher() interface {
	Identity() string
	PublishData(data []byte, topic string, destinationSIDs []string) error
}

func (*AgentSession) Interrupt ¶ added in v0.0.5

func (s *AgentSession) Interrupt(ctx context.Context) error

func (*AgentSession) OnAudioFrame ¶

func (s *AgentSession) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*AgentSession) Pause ¶ added in v0.0.5

func (s *AgentSession) Pause() error

func (*AgentSession) PublishAgentTranscript ¶

func (s *AgentSession) PublishAgentTranscript(text string)

PublishAgentTranscript publishes the agent's LLM response to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).

func (*AgentSession) PublishUserTranscript ¶

func (s *AgentSession) PublishUserTranscript(text string)

PublishUserTranscript publishes the user's STT transcript to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).

func (*AgentSession) Resume ¶ added in v0.0.5

func (s *AgentSession) Resume() error

func (*AgentSession) Say ¶ added in v0.0.5

func (s *AgentSession) Say(text string, allowInterruptions bool) (*SpeechHandle, error)

func (*AgentSession) SetAgentTrackSID ¶

func (s *AgentSession) SetAgentTrackSID(sid string)

func (*AgentSession) SetAudioOutput ¶ added in v0.0.5

func (s *AgentSession) SetAudioOutput(out AudioOutput)

func (*AgentSession) SetRemoteTrackSID ¶

func (s *AgentSession) SetRemoteTrackSID(sid string)

func (*AgentSession) SetRemoteUserIdentity ¶

func (s *AgentSession) SetRemoteUserIdentity(identity string)

func (*AgentSession) SetRoom ¶ added in v0.0.5

func (s *AgentSession) SetRoom(room *lksdk.Room)

SetRoom wires the LiveKit room to the session after connection. This initialises the ClientEventsDispatcher (RPC handlers, state broadcasting) so the Playground can discover the agent's audio track and state.

func (*AgentSession) SetVideoOutput ¶ added in v0.0.5

func (s *AgentSession) SetVideoOutput(out VideoOutput)

func (*AgentSession) Start ¶

func (s *AgentSession) Start(ctx context.Context) error

func (*AgentSession) Stop ¶

func (s *AgentSession) Stop(ctx context.Context) error

func (*AgentSession) TimelineSnapshot ¶ added in v0.0.5

func (s *AgentSession) TimelineSnapshot() []*AgentEvent

func (*AgentSession) UpdateAgent ¶ added in v0.0.5

func (s *AgentSession) UpdateAgent(agent AgentInterface, opts *UpdateAgentOpts) error

func (*AgentSession) UpdateAgentState ¶

func (s *AgentSession) UpdateAgentState(state AgentState)

func (*AgentSession) UpdateOptions ¶ added in v0.0.5

func (s *AgentSession) UpdateOptions(opts AgentSessionOptions)

func (*AgentSession) UpdateUserState ¶

func (s *AgentSession) UpdateUserState(state UserState)

type AgentSessionOptions ¶

type AgentSessionOptions struct {
	AllowInterruptions            bool
	DiscardAudioIfUninterruptible bool
	MinInterruptionDuration       float64
	MinInterruptionWords          int
	MinEndpointingDelay           float64
	MaxEndpointingDelay           float64
	MaxToolSteps                  int
	UserAwayTimeout               float64
	FalseInterruptionTimeout      float64
	ResumeFalseInterruption       bool
	MinConsecutiveSpeechDelay     float64
	UseTTSAlignedTranscript       bool
	PreemptiveGeneration          bool
	AECWarmupDuration             float64
	SpeakingRate                  float64
	TranscriptRefreshRate         time.Duration
	LinkedParticipant             lksdk.Participant
	IVRDetection                  bool
}

type AgentState ¶

type AgentState string

const (
	AgentStateInitializing AgentState = "initializing"
	AgentStateIdle         AgentState = "idle"
	AgentStateListening    AgentState = "listening"
	AgentStateThinking     AgentState = "thinking"
	AgentStateSpeaking     AgentState = "speaking"
)

type AgentStateChangedEvent ¶

type AgentStateChangedEvent struct {
	OldState  AgentState `json:"old_state"`
	NewState  AgentState `json:"new_state"`
	CreatedAt time.Time  `json:"created_at"`
}

func (*AgentStateChangedEvent) GetType ¶ added in v0.0.5

func (e *AgentStateChangedEvent) GetType() string

type AgentTask ¶

type AgentTask[T any] struct {
	Agent
	Result chan T
	Err    chan error
}

AgentTask represents a sub-agent execution that returns a result

func NewAgentTask ¶

func NewAgentTask[T any](instructions string) *AgentTask[T]

func (*AgentTask[T]) Complete ¶

func (t *AgentTask[T]) Complete(result T)

func (*AgentTask[T]) Fail ¶

func (t *AgentTask[T]) Fail(err error)

func (*AgentTask[T]) WaitAny ¶

func (t *AgentTask[T]) WaitAny(ctx context.Context) (any, error)

type AudioConfig ¶

type AudioConfig struct {
	Source      AudioSource
	Volume      float64
	Probability float64
}

type AudioInput ¶ added in v0.0.5

type AudioInput interface {
	Label() string
	Stream() <-chan *model.AudioFrame
	OnAttached()
	OnDetached()
}

AudioInput represents a source of audio frames (e.g., mic or remote track)

type AudioOutput ¶ added in v0.0.5

type AudioOutput interface {
	Label() string
	CaptureFrame(frame *model.AudioFrame) error
	Flush()
	WaitForPlayout(ctx context.Context) error
	ClearBuffer()
	OnAttached()
	OnDetached()
	Pause()
	Resume()
	OnPlaybackStarted(func(ev PlaybackStartedEvent))
	OnPlaybackFinished(func(ev PlaybackFinishedEvent))
}

AudioOutput represents a destination for audio frames (e.g., speakers or remote track)

type AudioReceiver ¶ added in v0.0.5

type AudioReceiver interface {
	Start(ctx context.Context) error
	Stream() <-chan *model.AudioFrame
	NotifyPlaybackFinished(playbackPosition time.Duration, interrupted bool) error
	Close() error
}

type AudioRecognition ¶

type AudioRecognition struct {
	// contains filtered or unexported fields
}

func NewAudioRecognition ¶

func NewAudioRecognition(session *AgentSession, hooks RecognitionHooks, s stt.STT, v vad.VAD) *AudioRecognition

func (*AudioRecognition) Close ¶ added in v0.0.5

func (ar *AudioRecognition) Close()

func (*AudioRecognition) Flush ¶ added in v0.0.5

func (ar *AudioRecognition) Flush() error

func (*AudioRecognition) PushAudio ¶

func (ar *AudioRecognition) PushAudio(frame *model.AudioFrame) error

func (*AudioRecognition) Start ¶

func (ar *AudioRecognition) Start(ctx context.Context) error

type AudioSource ¶

type AudioSource interface{} // Can be string, BuiltinAudioClip, or <-chan *model.AudioFrame

type AvatarIO ¶

type AvatarIO interface {
	SendAvatarData(ctx context.Context, data []byte) error
}

AvatarIO defines how Avatar commands/data are sent.

type AvatarOptions ¶ added in v0.0.5

type AvatarOptions struct {
	VideoWidth      int
	VideoHeight     int
	VideoFPS        float64
	AudioSampleRate int
	AudioChannels   int
}

type AvatarRunner ¶

type AvatarRunner struct {
	// contains filtered or unexported fields
}

AvatarRunner coordinates Avatar IO and LipSync events.

func NewAvatarRunner ¶

func NewAvatarRunner(room *lksdk.Room, audioRecv AudioReceiver, videoGen VideoGenerator, opts AvatarOptions, avSync AVSynchronizer, lazyPublish bool) *AvatarRunner

func (*AvatarRunner) SendLipSyncEvent ¶ added in v0.0.5

func (r *AvatarRunner) SendLipSyncEvent(ctx context.Context, data []byte) error

func (*AvatarRunner) Start ¶

func (r *AvatarRunner) Start(ctx context.Context) error

func (*AvatarRunner) Stop ¶

func (r *AvatarRunner) Stop()

type BackgroundAudioPlayer ¶

type BackgroundAudioPlayer struct {
	// contains filtered or unexported fields
}

func NewBackgroundAudioPlayer ¶

func NewBackgroundAudioPlayer(ambientSound, thinkingSound interface{}) *BackgroundAudioPlayer

func (*BackgroundAudioPlayer) AgentStateChanged ¶

func (p *BackgroundAudioPlayer) AgentStateChanged(newState AgentState)

func (*BackgroundAudioPlayer) Close ¶

func (p *BackgroundAudioPlayer) Close() error

func (*BackgroundAudioPlayer) Play ¶

func (p *BackgroundAudioPlayer) Play(audio interface{}, loop bool) *PlayHandle

func (*BackgroundAudioPlayer) Start ¶

func (p *BackgroundAudioPlayer) Start(room *lksdk.Room, agentSession *AgentSession) error

type BuiltinAudioClip ¶

type BuiltinAudioClip string

const (
	CityAmbience    BuiltinAudioClip = "city-ambience.ogg"
	ForestAmbience  BuiltinAudioClip = "forest-ambience.ogg"
	OfficeAmbience  BuiltinAudioClip = "office-ambience.ogg"
	CrowdedRoom     BuiltinAudioClip = "crowded-room.ogg"
	KeyboardTyping  BuiltinAudioClip = "keyboard-typing.ogg"
	KeyboardTyping2 BuiltinAudioClip = "keyboard-typing2.ogg"
	HoldMusic       BuiltinAudioClip = "hold_music.ogg"
)

func (BuiltinAudioClip) Path ¶

func (b BuiltinAudioClip) Path() string

type ChatMessageRunEvent ¶ added in v0.0.5

type ChatMessageRunEvent struct {
	Item *llm.ChatMessage
}

func (*ChatMessageRunEvent) GetCreatedAt ¶ added in v0.0.5

func (e *ChatMessageRunEvent) GetCreatedAt() time.Time

func (*ChatMessageRunEvent) GetItem ¶ added in v0.0.5

func (e *ChatMessageRunEvent) GetItem() llm.ChatItem

func (*ChatMessageRunEvent) RunEventType ¶ added in v0.0.5

func (e *ChatMessageRunEvent) RunEventType() string

type ClientEventPayload ¶

type ClientEventPayload struct {
	Type  string `json:"type"`
	State string `json:"state,omitempty"`
}

type ClientEventsDispatcher ¶

type ClientEventsDispatcher struct {
	// contains filtered or unexported fields
}

ClientEventsDispatcher manages sending Agent states to the LiveKit Room DataChannel and handling inbound RPC and DataChannel requests.

func NewClientEventsDispatcher ¶

func NewClientEventsDispatcher(room *lksdk.Room, session *AgentSession) *ClientEventsDispatcher

func (*ClientEventsDispatcher) Close ¶ added in v0.0.5

func (d *ClientEventsDispatcher) Close()

func (*ClientEventsDispatcher) DispatchAgentState ¶

func (d *ClientEventsDispatcher) DispatchAgentState(state AgentState)

DispatchAgentState emits AgentStateIdle, AgentStateThinking, AgentStateSpeaking

func (*ClientEventsDispatcher) DispatchUserState ¶

func (d *ClientEventsDispatcher) DispatchUserState(state UserState)

DispatchUserState emits UserStateListening, UserStateSpeaking

func (*ClientEventsDispatcher) RegisterTextInput ¶ added in v0.0.5

func (d *ClientEventsDispatcher) RegisterTextInput(cb TextInputCallback)

type CloseEvent ¶

type CloseEvent struct {
	Reason    CloseReason `json:"reason"`
	Error     error       `json:"error,omitempty"`
	CreatedAt time.Time   `json:"created_at"`
}

func (*CloseEvent) GetType ¶

func (e *CloseEvent) GetType() string

type CloseReason ¶

type CloseReason string

const (
	CloseReasonError                   CloseReason = "error"
	CloseReasonJobShutdown             CloseReason = "job_shutdown"
	CloseReasonParticipantDisconnected CloseReason = "participant_disconnected"
	CloseReasonUserInitiated           CloseReason = "user_initiated"
	CloseReasonTaskCompleted           CloseReason = "task_completed"
)

type CommitUserTurnOpts ¶ added in v0.0.5

type CommitUserTurnOpts struct {
	AudioDetached     bool
	TranscriptTimeout time.Duration
	STTFlushDuration  time.Duration
	SkipReply         bool
}

type ConversationItemAddedEvent ¶

type ConversationItemAddedEvent struct {
	Item      llm.ChatItem `json:"item"`
	CreatedAt time.Time    `json:"created_at"`
}

func (*ConversationItemAddedEvent) GetType ¶

func (e *ConversationItemAddedEvent) GetType() string

type DataStreamIO ¶

type DataStreamIO struct {
	// contains filtered or unexported fields
}

func NewDataStreamIO ¶

func NewDataStreamIO(room *lksdk.Room) *DataStreamIO

func (*DataStreamIO) SendAvatarData ¶

func (io *DataStreamIO) SendAvatarData(ctx context.Context, data []byte) error

type DtmfEvent ¶

type DtmfEvent struct {
	Digit string
	Time  time.Time
}

type EndOfTurnInfo ¶

type EndOfTurnInfo struct {
	SkipReply            bool
	TranscriptTimeout    time.Duration
	STTFlushDuration     time.Duration
	NewTranscript        string
	TranscriptConfidence float64
	StartedSpeakingAt    *float64
	StoppedSpeakingAt    *float64
}

type ErrorEvent ¶ added in v0.0.5

type ErrorEvent struct {
	Error     error     `json:"error"`
	Source    any       `json:"source,omitempty"`
	CreatedAt time.Time `json:"created_at"`
}

func (*ErrorEvent) GetType ¶ added in v0.0.5

func (e *ErrorEvent) GetType() string

type EvaluationResult ¶

type EvaluationResult struct {
	Judgments map[string]string
}

type Event ¶

type Event interface {
	GetType() string
}

type EventTimeline ¶ added in v0.0.5

type EventTimeline struct {
	OnEvent func(ev *AgentEvent)
	// contains filtered or unexported fields
}

func NewEventTimeline ¶ added in v0.0.5

func NewEventTimeline() *EventTimeline

func (*EventTimeline) AddEvent ¶ added in v0.0.5

func (t *EventTimeline) AddEvent(ev Event)

func (*EventTimeline) Clear ¶ added in v0.0.5

func (t *EventTimeline) Clear()

Clear releases all stored events and the OnEvent callback so the timeline (and everything it references) can be garbage-collected.

func (*EventTimeline) Snapshot ¶ added in v0.0.5

func (t *EventTimeline) Snapshot() []*AgentEvent

type FunctionCallOutputRunEvent ¶ added in v0.0.5

type FunctionCallOutputRunEvent struct {
	Item *llm.FunctionCallOutput
}

func (*FunctionCallOutputRunEvent) GetCreatedAt ¶ added in v0.0.5

func (e *FunctionCallOutputRunEvent) GetCreatedAt() time.Time

func (*FunctionCallOutputRunEvent) GetItem ¶ added in v0.0.5

func (e *FunctionCallOutputRunEvent) GetItem() llm.ChatItem

func (*FunctionCallOutputRunEvent) RunEventType ¶ added in v0.0.5

func (e *FunctionCallOutputRunEvent) RunEventType() string

type FunctionCallRunEvent ¶ added in v0.0.5

type FunctionCallRunEvent struct {
	Item *llm.FunctionCall
}

func (*FunctionCallRunEvent) GetCreatedAt ¶ added in v0.0.5

func (e *FunctionCallRunEvent) GetCreatedAt() time.Time

func (*FunctionCallRunEvent) GetItem ¶ added in v0.0.5

func (e *FunctionCallRunEvent) GetItem() llm.ChatItem

func (*FunctionCallRunEvent) RunEventType ¶ added in v0.0.5

func (e *FunctionCallRunEvent) RunEventType() string

type FunctionToolsExecutedEvent ¶ added in v0.0.5

type FunctionToolsExecutedEvent struct {
	FunctionCalls       []llm.FunctionCall        `json:"function_calls"`
	FunctionCallOutputs []*llm.FunctionCallOutput `json:"function_call_outputs"`
	CreatedAt           time.Time                 `json:"created_at"`
	HasToolReply        bool                      `json:"has_tool_reply"`
	HasAgentHandoff     bool                      `json:"has_agent_handoff"`
}

func (*FunctionToolsExecutedEvent) GetType ¶ added in v0.0.5

func (e *FunctionToolsExecutedEvent) GetType() string

type GenerateReplyOpts ¶ added in v0.0.5

type GenerateReplyOpts struct {
	AllowInterruptions bool
}

type GetAgentInfoResponse ¶ added in v0.0.5

type GetAgentInfoResponse struct {
	ID           string         `json:"id"`
	Instructions string         `json:"instructions,omitempty"`
	Tools        []string       `json:"tools"`
	ChatCtx      []llm.ChatItem `json:"chat_ctx"`
}

type GetChatHistoryResponse ¶ added in v0.0.5

type GetChatHistoryResponse struct {
	Items []llm.ChatItem `json:"items"`
}

type GetSessionStateResponse ¶ added in v0.0.5

type GetSessionStateResponse struct {
	AgentState string         `json:"agent_state"`
	UserState  string         `json:"user_state"`
	AgentID    string         `json:"agent_id"`
	Options    map[string]any `json:"options"`
	CreatedAt  float64        `json:"created_at"`
}

type IVRActivity ¶

type IVRActivity struct {
	AgentIntf AgentInterface
	Agent     *Agent
	// contains filtered or unexported fields
}

func NewIVRActivity ¶

func NewIVRActivity(agentIntf AgentInterface) *IVRActivity

func (*IVRActivity) OnDtmf ¶

func (i *IVRActivity) OnDtmf(digit string)

func (*IVRActivity) SetDigitCallback ¶

func (i *IVRActivity) SetDigitCallback(timeout time.Duration, cb func(buffer string) (bool, error))

func (*IVRActivity) Start ¶

func (i *IVRActivity) Start()

func (*IVRActivity) Stop ¶

func (i *IVRActivity) Stop()

type InputDetails ¶

type InputDetails struct {
	Modality string
}

func DefaultInputDetails ¶

func DefaultInputDetails() InputDetails

type JobResult ¶ added in v0.0.5

type JobResult interface {
	Wait(ctx context.Context) error
	GetEvents() []RunEvent
}

type LLMGenerationData ¶

type LLMGenerationData struct {
	TextCh        chan string
	FunctionCh    chan *llm.FunctionToolCall
	FullTextCh    chan string // receives the complete assembled text when streaming is done
	GeneratedText string
	Usage         *llm.CompletionUsage
}

func PerformLLMInference ¶

func PerformLLMInference(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []interface{}) (*LLMGenerationData, error)

type LLMNodeFunc ¶ added in v0.0.5

type LLMNodeFunc func(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []interface{}) (*LLMGenerationData, error)

type LLMTurnDetector ¶

type LLMTurnDetector struct {
	// contains filtered or unexported fields
}

LLMTurnDetector uses an LLM to predict if the user has finished speaking. It sends the recent conversation history to the LLM and asks for a probability score.

func NewLLMTurnDetector ¶

func NewLLMTurnDetector(llmInstance llm.LLM) *LLMTurnDetector

func (*LLMTurnDetector) PredictEndOfTurn ¶

func (m *LLMTurnDetector) PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)

type MediaPublisher ¶ added in v0.0.5

type MediaPublisher interface {
	Identity() string
	PublishData(data []byte, topic string, destinationSIDs []string) error
	SetAttributes(attrs map[string]string) error
}

type MetricsCollectedEvent ¶

type MetricsCollectedEvent struct {
	Metrics   telemetry.AgentMetrics `json:"metrics"`
	CreatedAt time.Time              `json:"created_at"`
}

func (*MetricsCollectedEvent) GetType ¶

func (e *MetricsCollectedEvent) GetType() string

type MultimodalAgent ¶

type MultimodalAgent struct {
	PublishAudio func(frame *model.AudioFrame) error
	// contains filtered or unexported fields
}

func NewMultimodalAgent ¶

func NewMultimodalAgent(
	m llm.RealtimeModel,
	chatCtx *llm.ChatContext,
) *MultimodalAgent

func (*MultimodalAgent) OnAudioFrame ¶

func (ma *MultimodalAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*MultimodalAgent) Start ¶

func (ma *MultimodalAgent) Start(ctx context.Context, s *AgentSession) error

type ParticipantActiveEvent ¶ added in v0.0.5

type ParticipantActiveEvent struct {
	ParticipantID string    `json:"participant_id"`
	Identity      string    `json:"identity"`
	Active        bool      `json:"active"`
	CreatedAt     time.Time `json:"created_at"`
}

func (*ParticipantActiveEvent) GetType ¶ added in v0.0.5

func (e *ParticipantActiveEvent) GetType() string

type ParticipantReference ¶ added in v0.0.5

type ParticipantReference interface {
	Identity() string
	SID() string
}

type PipelineAgent ¶

type PipelineAgent struct {
	LLM llm.LLM
	// contains filtered or unexported fields
}

func NewPipelineAgent ¶

func NewPipelineAgent(
	vad vad.VAD,
	sttInstance stt.STT,
	llmObj llm.LLM,
	ttsInstance tts.TTS,
	chatCtx *llm.ChatContext,
) *PipelineAgent

func (*PipelineAgent) GenerateReply ¶ added in v0.0.5

func (va *PipelineAgent) GenerateReply(speech *SpeechHandle)

func (*PipelineAgent) OnAudioFrame ¶

func (va *PipelineAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*PipelineAgent) Start ¶

func (va *PipelineAgent) Start(ctx context.Context, s *AgentSession) error

func (*PipelineAgent) Stop ¶ added in v0.0.5

func (va *PipelineAgent) Stop()

type PlayHandle ¶

type PlayHandle struct {
	// contains filtered or unexported fields
}

func (*PlayHandle) Done ¶

func (h *PlayHandle) Done() bool

func (*PlayHandle) Stop ¶

func (h *PlayHandle) Stop()

func (*PlayHandle) WaitForPlayout ¶

func (h *PlayHandle) WaitForPlayout()

type PlaybackFinishedEvent ¶ added in v0.0.5

type PlaybackFinishedEvent struct {
	PlaybackPosition       time.Duration
	Interrupted            bool
	SynchronizedTranscript string
}

type PlaybackStartedEvent ¶ added in v0.0.5

type PlaybackStartedEvent struct {
	CreatedAt time.Time
}

type Plugin ¶

type Plugin interface {
	Title() string
	Version() string
	Package() string
	DownloadFiles() error
}

func RegisteredPlugins ¶

func RegisteredPlugins() []Plugin

type QueueIO ¶

type QueueIO struct {
	// contains filtered or unexported fields
}

func NewQueueIO ¶

func NewQueueIO() *QueueIO

func (*QueueIO) ReadQueue ¶

func (io *QueueIO) ReadQueue() <-chan []byte

func (*QueueIO) SendAvatarData ¶

func (io *QueueIO) SendAvatarData(ctx context.Context, data []byte) error

type RealtimeAudioOutputNodeFunc ¶ added in v0.0.5

type RealtimeAudioOutputNodeFunc func(ctx context.Context, audio <-chan *model.AudioFrame) (<-chan *model.AudioFrame, error)

type RecognitionHooks ¶

type RecognitionHooks interface {
	OnStartOfSpeech(ev *vad.VADEvent)
	OnEndOfSpeech(ev *vad.VADEvent)
	OnInterimTranscript(ev *stt.SpeechEvent)
	OnFinalTranscript(ev *stt.SpeechEvent)
}

type RecordingOptions ¶

type RecordingOptions struct {
	Audio      bool `json:"audio"`
	Traces     bool `json:"traces"`
	Logs       bool `json:"logs"`
	Transcript bool `json:"transcript"`
}

type RunAssert ¶

type RunAssert struct {
	ChatCtx *llm.ChatContext
	// contains filtered or unexported fields
}

func (*RunAssert) ContainsMessage ¶

func (a *RunAssert) ContainsMessage(role llm.ChatRole, content string) *RunAssert

func (*RunAssert) HasError ¶

func (a *RunAssert) HasError() error

func (*RunAssert) IsFunctionCall ¶

func (a *RunAssert) IsFunctionCall(name string) *RunAssert

func (*RunAssert) Judge ¶

func (a *RunAssert) Judge(ctx context.Context, evaluator evals.Evaluator, llmInstance llm.LLM) (*RunAssert, error)

type RunContext ¶

type RunContext struct {
	Session      *AgentSession
	SpeechHandle *SpeechHandle
	FunctionCall *llm.FunctionCall
}

func GetRunContext ¶

func GetRunContext(ctx context.Context) *RunContext

func (*RunContext) WaitForPlayout ¶

func (r *RunContext) WaitForPlayout(ctx context.Context) error

type RunEvent ¶ added in v0.0.5

type RunEvent interface {
	RunEventType() string
	GetCreatedAt() time.Time
	GetItem() llm.ChatItem
}

type RunResult ¶

type RunResult[T any] struct {
	ChatCtx   *llm.ChatContext
	Timestamp float64
	Expect    *RunAssert

	FinalOutput T

	Events []RunEvent
	// contains filtered or unexported fields
}

func GenerateTypedReply ¶ added in v0.0.5

func GenerateTypedReply[T any](ctx context.Context, s *AgentSession, userInput string, opts *GenerateReplyOpts) (*RunResult[T], error)

func NewRunResult ¶

func NewRunResult[T any](chatCtx *llm.ChatContext) *RunResult[T]

func (*RunResult[T]) AddEvent ¶ added in v0.0.5

func (r *RunResult[T]) AddEvent(ev RunEvent)

func (*RunResult[T]) Done ¶ added in v0.0.5

func (r *RunResult[T]) Done() <-chan struct{}

func (*RunResult[T]) Eval ¶ added in v0.0.5

func (r *RunResult[T]) Eval(ctx context.Context, evaluator evals.Evaluator, llmInstance llm.LLM) (*evals.JudgmentResult, error)

func (*RunResult[T]) GetEvents ¶ added in v0.0.5

func (r *RunResult[T]) GetEvents() []RunEvent

func (*RunResult[T]) Wait ¶ added in v0.0.5

func (r *RunResult[T]) Wait(ctx context.Context) error

func (*RunResult[T]) WaitAny ¶ added in v0.0.5

func (r *RunResult[T]) WaitAny(ctx context.Context) (T, error)

func (*RunResult[T]) WatchHandle ¶ added in v0.0.5

func (r *RunResult[T]) WatchHandle(ctx context.Context, handle *SpeechHandle)

func (*RunResult[T]) WatchTask ¶ added in v0.0.5

func (r *RunResult[T]) WatchTask(done <-chan struct{})

type RunResultInterface ¶ added in v0.0.5

type RunResultInterface interface {
	AddEvent(ev RunEvent)
	WatchTask(done <-chan struct{})
}

type STTNodeFunc ¶ added in v0.0.5

type STTNodeFunc func(ctx context.Context, s stt.STT, audio <-chan *model.AudioFrame) (<-chan *stt.SpeechEvent, error)

type SendMessageRequest ¶ added in v0.0.5

type SendMessageRequest struct {
	Text string `json:"text"`
}

type SendMessageResponse ¶ added in v0.0.5

type SendMessageResponse struct {
	Items []llm.ChatItem `json:"items"`
}

type SessionInfo ¶ added in v0.0.5

type SessionInfo interface {
	LocalParticipantID() string
}

type SessionReport ¶

type SessionReport struct {
	RecordingOptions        RecordingOptions    `json:"recording_options"`
	JobID                   string              `json:"job_id"`
	RoomID                  string              `json:"room_id"`
	Room                    string              `json:"room"`
	Options                 AgentSessionOptions `json:"options"`
	Events                  []any               `json:"events"`
	Timeline                []*AgentEvent       `json:"timeline,omitempty"`
	ChatHistory             *llm.ChatContext    `json:"chat_history"`
	AudioRecordingPath      *string             `json:"audio_recording_path,omitempty"`
	AudioRecordingStartedAt *float64            `json:"audio_recording_started_at,omitempty"`
	Duration                *float64            `json:"duration,omitempty"`
	StartedAt               *float64            `json:"started_at,omitempty"`
	Timestamp               float64             `json:"timestamp"`
	// contains filtered or unexported fields
}

func NewSessionReport ¶

func NewSessionReport() *SessionReport

func (*SessionReport) AddEvent ¶ added in v0.0.5

func (r *SessionReport) AddEvent(event any)

func (*SessionReport) SetChatHistory ¶ added in v0.0.5

func (r *SessionReport) SetChatHistory(chatCtx *llm.ChatContext)

func (*SessionReport) SetTimeline ¶ added in v0.0.5

func (r *SessionReport) SetTimeline(events []*AgentEvent)

func (*SessionReport) ToDict ¶ added in v0.0.5

func (r *SessionReport) ToDict() map[string]any

type SpeechCreatedEvent ¶

type SpeechCreatedEvent struct {
	UserInitiated bool          `json:"user_initiated"`
	Source        string        `json:"source"`
	SpeechHandle  *SpeechHandle `json:"-"`
	ParticipantID string        `json:"participant_id,omitempty"`
	CreatedAt     time.Time     `json:"created_at"`
}

func (*SpeechCreatedEvent) GetType ¶

func (e *SpeechCreatedEvent) GetType() string

type SpeechHandle ¶

type SpeechHandle struct {
	ID                 string
	AllowInterruptions bool
	InputDetails       InputDetails
	Priority           int
	CreatedAt          time.Time

	FinalOutput any
	ManualText  string

	OnItemAdded func(item llm.ChatItem)
	RunResult   RunResultInterface
	// contains filtered or unexported fields
}

func NewSpeechHandle ¶

func NewSpeechHandle(allowInterruptions bool, inputDetails InputDetails) *SpeechHandle

func (*SpeechHandle) Error ¶ added in v0.0.5

func (s *SpeechHandle) Error() error

func (*SpeechHandle) Interrupt ¶

func (s *SpeechHandle) Interrupt(force bool) error

func (*SpeechHandle) IsDone ¶

func (s *SpeechHandle) IsDone() bool

func (*SpeechHandle) IsInterrupted ¶

func (s *SpeechHandle) IsInterrupted() bool

func (*SpeechHandle) IsScheduled ¶

func (s *SpeechHandle) IsScheduled() bool

func (*SpeechHandle) MarkDone ¶

func (s *SpeechHandle) MarkDone()

func (*SpeechHandle) MarkDoneWithError ¶ added in v0.0.5

func (s *SpeechHandle) MarkDoneWithError(err error)

func (*SpeechHandle) MarkScheduled ¶

func (s *SpeechHandle) MarkScheduled()

func (*SpeechHandle) Wait ¶

func (s *SpeechHandle) Wait(ctx context.Context) error

type StreamRequest ¶ added in v0.0.5

type StreamRequest struct {
	ID      string `json:"id"`
	Method  string `json:"method"`
	Payload string `json:"payload"`
}

type StreamResponse ¶ added in v0.0.5

type StreamResponse struct {
	ID      string `json:"id"`
	Payload string `json:"payload,omitempty"`
	Error   string `json:"error,omitempty"`
}

type SyncEvent ¶ added in v0.0.5

type SyncEvent struct {
	Text      string
	Flush     bool
	SegmentID string
}

type SyncedAudioOutput ¶ added in v0.0.5

type SyncedAudioOutput struct {
	// contains filtered or unexported fields
}

SyncedAudioOutput wraps an AudioOutput and pushes frames to the synchronizer

func NewSyncedAudioOutput ¶ added in v0.0.5

func NewSyncedAudioOutput(sync *TranscriptSynchronizer, next AudioOutput) *SyncedAudioOutput

func (*SyncedAudioOutput) CaptureFrame ¶ added in v0.0.5

func (s *SyncedAudioOutput) CaptureFrame(frame *model.AudioFrame) error

func (*SyncedAudioOutput) ClearBuffer ¶ added in v0.0.5

func (s *SyncedAudioOutput) ClearBuffer()

func (*SyncedAudioOutput) Flush ¶ added in v0.0.5

func (s *SyncedAudioOutput) Flush()

func (*SyncedAudioOutput) Label ¶ added in v0.0.5

func (s *SyncedAudioOutput) Label() string

func (*SyncedAudioOutput) OnAttached ¶ added in v0.0.5

func (s *SyncedAudioOutput) OnAttached()

func (*SyncedAudioOutput) OnDetached ¶ added in v0.0.5

func (s *SyncedAudioOutput) OnDetached()

func (*SyncedAudioOutput) OnPlaybackFinished ¶ added in v0.0.5

func (s *SyncedAudioOutput) OnPlaybackFinished(f func(ev PlaybackFinishedEvent))

func (*SyncedAudioOutput) OnPlaybackStarted ¶ added in v0.0.5

func (s *SyncedAudioOutput) OnPlaybackStarted(f func(ev PlaybackStartedEvent))

func (*SyncedAudioOutput) Pause ¶ added in v0.0.5

func (s *SyncedAudioOutput) Pause()

func (*SyncedAudioOutput) Resume ¶ added in v0.0.5

func (s *SyncedAudioOutput) Resume()

func (*SyncedAudioOutput) WaitForPlayout ¶ added in v0.0.5

func (s *SyncedAudioOutput) WaitForPlayout(ctx context.Context) error

type SyncedTextOutput ¶ added in v0.0.5

type SyncedTextOutput struct {
	// contains filtered or unexported fields
}

SyncedTextOutput wraps a TextOutput and pushes text to the synchronizer

func NewSyncedTextOutput ¶ added in v0.0.5

func NewSyncedTextOutput(sync *TranscriptSynchronizer, next TextOutput) *SyncedTextOutput

func (*SyncedTextOutput) CaptureText ¶ added in v0.0.5

func (s *SyncedTextOutput) CaptureText(text string) error

func (*SyncedTextOutput) Flush ¶ added in v0.0.5

func (s *SyncedTextOutput) Flush()

func (*SyncedTextOutput) Label ¶ added in v0.0.5

func (s *SyncedTextOutput) Label() string

func (*SyncedTextOutput) OnAttached ¶ added in v0.0.5

func (s *SyncedTextOutput) OnAttached()

func (*SyncedTextOutput) OnDetached ¶ added in v0.0.5

func (s *SyncedTextOutput) OnDetached()

func (*SyncedTextOutput) SetSegmentID ¶ added in v0.0.5

func (s *SyncedTextOutput) SetSegmentID(id string)

type TTSGenerationData ¶

type TTSGenerationData struct {
	AudioCh       chan *model.AudioFrame
	AlignedTextCh chan string
	TTFB          time.Duration
}

func PerformTTSInference ¶

func PerformTTSInference(ctx context.Context, t tts.TTS, textCh <-chan string) (*TTSGenerationData, error)

type TTSNodeFunc ¶ added in v0.0.5

type TTSNodeFunc func(ctx context.Context, t tts.TTS, textCh <-chan string) (*TTSGenerationData, error)

type Tagger ¶

type Tagger struct {
	// contains filtered or unexported fields
}

func NewTagger ¶

func NewTagger() *Tagger

func (*Tagger) Add ¶

func (t *Tagger) Add(tag string)

func (*Tagger) Evaluation ¶

func (t *Tagger) Evaluation(result *EvaluationResult)

func (*Tagger) Fail ¶

func (t *Tagger) Fail(reason string)

func (*Tagger) OutcomeReason ¶

func (t *Tagger) OutcomeReason() string

func (*Tagger) Remove ¶

func (t *Tagger) Remove(tag string)

func (*Tagger) Success ¶

func (t *Tagger) Success(reason string)

func (*Tagger) Tags ¶

func (t *Tagger) Tags() []string

type TaskWaiter ¶

type TaskWaiter interface {
	WaitAny(ctx context.Context) (any, error)
}

type TextInput ¶ added in v0.0.5

type TextInput interface {
	Label() string
	OnAttached()
	OnDetached()
}

TextInput represents a source of text (e.g., chat messages or remote text tracks)

type TextInputCallback ¶ added in v0.0.5

type TextInputCallback func(s *AgentSession, ev TextInputEvent) error

type TextInputEvent ¶ added in v0.0.5

type TextInputEvent struct {
	Text        string            `json:"text"`
	Participant lksdk.Participant `json:"-"`
}

type TextOutput ¶ added in v0.0.5

type TextOutput interface {
	Label() string
	CaptureText(text string) error
	SetSegmentID(id string)
	Flush()
	OnAttached()
	OnDetached()
}

TextOutput represents a destination for text (e.g., transcriptions)

type ToolExecutionOutput ¶

type ToolExecutionOutput struct {
	FncCall       llm.FunctionCall
	FncCallOut    *llm.FunctionCallOutput
	RawOutput     any
	RawError      error
	ReplyRequired bool
	AgentTask     AgentInterface
}

type TranscriptSynchronizer ¶

type TranscriptSynchronizer struct {
	// contains filtered or unexported fields
}

TranscriptSynchronizer drip-feeds text to match the playout speed of audio.

func NewTranscriptSynchronizer ¶

func NewTranscriptSynchronizer(speakingRate float64, refreshRate time.Duration) *TranscriptSynchronizer

NewTranscriptSynchronizer initializes the synchronizer. Default speaking rate is usually ~3.83 syllables/sec.

func (*TranscriptSynchronizer) Close ¶

func (s *TranscriptSynchronizer) Close()

func (*TranscriptSynchronizer) EventCh ¶

func (s *TranscriptSynchronizer) EventCh() <-chan SyncEvent

func (*TranscriptSynchronizer) Interrupt ¶

func (s *TranscriptSynchronizer) Interrupt()

func (*TranscriptSynchronizer) PushAudio ¶

func (s *TranscriptSynchronizer) PushAudio(frame *model.AudioFrame)

func (*TranscriptSynchronizer) PushText ¶

func (s *TranscriptSynchronizer) PushText(text string)

func (*TranscriptSynchronizer) RotateSegment ¶ added in v0.0.5

func (s *TranscriptSynchronizer) RotateSegment()

RotateSegment flushes the remaining text buffer and resets the time accumulators for a new audio segment.

func (*TranscriptSynchronizer) SetSegmentID ¶ added in v0.0.5

func (s *TranscriptSynchronizer) SetSegmentID(id string)

type TranscriptionFilter ¶

type TranscriptionFilter struct {
	SpeakingRate float64
}

func NewTranscriptionFilter ¶

func NewTranscriptionFilter() *TranscriptionFilter

type TranscriptionNodeFunc ¶ added in v0.0.5

type TranscriptionNodeFunc func(ctx context.Context, textCh <-chan string) (<-chan string, error)

type TransitionActivityAction ¶ added in v0.0.5

type TransitionActivityAction string

const (
	TransitionActivityClose  TransitionActivityAction = "close"
	TransitionActivityPause  TransitionActivityAction = "pause"
	TransitionActivityStart  TransitionActivityAction = "start"
	TransitionActivityResume TransitionActivityAction = "resume"
)

type TurnDetectionMode ¶

type TurnDetectionMode string

const (
	TurnDetectionModeSTT         TurnDetectionMode = "stt"
	TurnDetectionModeVAD         TurnDetectionMode = "vad"
	TurnDetectionModeRealtimeLLM TurnDetectionMode = "realtime_llm"
	TurnDetectionModeManual      TurnDetectionMode = "manual"
)

type TurnDetector ¶

type TurnDetector interface {
	PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)
}

type UpdateAgentOpts ¶ added in v0.0.5

type UpdateAgentOpts struct {
	PreviousActivity TransitionActivityAction
	NewActivity      TransitionActivityAction
}

type UserInputTranscribedEvent ¶

type UserInputTranscribedEvent struct {
	Transcript string    `json:"transcript"`
	IsFinal    bool      `json:"is_final"`
	SpeakerID  string    `json:"speaker_id,omitempty"`
	Language   string    `json:"language,omitempty"`
	CreatedAt  time.Time `json:"created_at"`
}

func (*UserInputTranscribedEvent) GetType ¶

func (e *UserInputTranscribedEvent) GetType() string

type UserState ¶

type UserState string

Discriminator types

const (
	UserStateSpeaking  UserState = "speaking"
	UserStateListening UserState = "listening"
	UserStateAway      UserState = "away"
)

type UserStateChangedEvent ¶

type UserStateChangedEvent struct {
	OldState  UserState `json:"old_state"`
	NewState  UserState `json:"new_state"`
	CreatedAt time.Time `json:"created_at"`
}

func (*UserStateChangedEvent) GetType ¶ added in v0.0.5

func (e *UserStateChangedEvent) GetType() string

type VideoGenerator ¶ added in v0.0.5

type VideoGenerator interface {
	PushAudio(frame *model.AudioFrame) error
	Stream() <-chan interface{} // Yields *model.AudioFrame, *model.VideoFrame, or *model.AudioSegmentEnd
	ClearBuffer() error
	Close() error
}

type VideoInput ¶ added in v0.0.5

type VideoInput interface {
	Label() string
	Stream() <-chan *model.VideoFrame
	OnAttached()
	OnDetached()
}

VideoInput represents a source of video frames (e.g., camera or remote track)

type VideoNodeFunc ¶ added in v0.0.5

type VideoNodeFunc func(ctx context.Context, video <-chan *model.VideoFrame) error

type VideoOutput ¶ added in v0.0.5

type VideoOutput interface {
	Label() string
	CaptureVideoFrame(frame *model.VideoFrame) error
	Flush()
	OnAttached()
	OnDetached()
}

VideoOutput represents a destination for video frames (e.g., screen or remote track)

type VoiceActivityVideoSampler ¶

type VoiceActivityVideoSampler struct {
	// contains filtered or unexported fields
}

VoiceActivityVideoSampler samples video frames at a reduced rate (e.g. 1 fps) only when the user is speaking, to reduce LLM context token usage.

func NewVoiceActivityVideoSampler ¶

func NewVoiceActivityVideoSampler(session *AgentSession, sampleRate float64, opts images.EncodeOptions) *VoiceActivityVideoSampler

func (*VoiceActivityVideoSampler) OnVideoFrame ¶

func (s *VoiceActivityVideoSampler) OnVideoFrame(ctx context.Context, frame *images.VideoFrame) bool

OnVideoFrame should be called for every incoming WebRTC video frame. It returns true if the frame should be forwarded to the LLM.

func (*VoiceActivityVideoSampler) SetSpeaking ¶

func (s *VoiceActivityVideoSampler) SetSpeaking(speaking bool)

Directories ¶

Path	Synopsis
ivr

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL