agent

package
v0.0.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 22, 2026 License: MIT Imports: 40 Imported by: 0

Documentation

Index

Constants

View Source
const (
	TopicAgentRequest  = "lk.agent.request"
	TopicAgentResponse = "lk.agent.response"
	TopicChat          = "lk.chat"
)
View Source
const (
	SpeechPriorityLow    = 0
	SpeechPriorityNormal = 5
	SpeechPriorityHigh   = 10
	InterruptionTimeout  = 5 * time.Second
)
View Source
const TopicClientEvents = "lk-agent-client-events"

Variables

This section is empty.

Functions

func DefaultTextInputCallback added in v0.0.5

func DefaultTextInputCallback(s *AgentSession, ev TextInputEvent) error

func GetOutput added in v0.0.5

func GetOutput[T any](ctx context.Context, r *RunResult[T]) (T, error)

GetOutput returns the strictly typed final output of the run, blocking until completion.

func PerformToolExecutions

func PerformToolExecutions(
	ctx context.Context,
	functionCh <-chan *llm.FunctionToolCall,
	toolCtx *llm.ToolContext,
) <-chan ToolExecutionOutput

func RegisterPlugin

func RegisterPlugin(p Plugin)

func UploadSessionReport

func UploadSessionReport(
	cloudURL string,
	apiKey string,
	apiSecret string,
	agentName string,
	report *SessionReport,
) error

func WithRunContext

func WithRunContext(ctx context.Context, rc *RunContext) context.Context

Types

type AVSynchronizer added in v0.0.5

type AVSynchronizer interface {
	Push(frame interface{}) error
	Close() error
}

type Agent

type Agent struct {
	ID           string
	Instructions string
	ChatCtx      *llm.ChatContext
	Tools        []interface{}

	TurnDetection TurnDetectionMode
	TurnDetector  TurnDetector
	STT           stt.STT
	VAD           vad.VAD
	LLM           llm.LLM
	TTS           tts.TTS

	LLMNode                 LLMNodeFunc
	TTSNode                 TTSNodeFunc
	STTNode                 STTNodeFunc
	VideoNode               VideoNodeFunc
	TranscriptionNode       TranscriptionNodeFunc
	RealtimeAudioOutputNode RealtimeAudioOutputNodeFunc

	AllowInterruptions        bool
	MinConsecutiveSpeechDelay float64
	UseTTSAlignedTranscript   bool
	MinEndpointingDelay       float64
	MaxEndpointingDelay       float64
	// contains filtered or unexported fields
}

func NewAgent

func NewAgent(instructions string) *Agent

func (*Agent) GetActivity

func (a *Agent) GetActivity() *AgentActivity

func (*Agent) GetAgent

func (a *Agent) GetAgent() *Agent

func (*Agent) OnEnter

func (a *Agent) OnEnter(ctx context.Context) error

func (*Agent) OnExit

func (a *Agent) OnExit(ctx context.Context) error

func (*Agent) OnUserTurnCompleted

func (a *Agent) OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error

func (*Agent) Start

func (a *Agent) Start(session *AgentSession, agentIntf AgentInterface) *AgentActivity

func (*Agent) UpdateInstructions

func (a *Agent) UpdateInstructions(ctx context.Context, instructions string) error

func (*Agent) UpdateTools

func (a *Agent) UpdateTools(ctx context.Context, tools []interface{}) error

type AgentActivity

type AgentActivity struct {
	AgentIntf AgentInterface
	Agent     *Agent
	Session   *AgentSession
	// contains filtered or unexported fields
}

AgentActivity handles the internal event loops, I/O processing, and speech generation queue for an Agent.

func NewAgentActivity

func NewAgentActivity(agentIntf AgentInterface, session *AgentSession, parentCtx context.Context) *AgentActivity

func (*AgentActivity) AClose added in v0.0.5

func (a *AgentActivity) AClose()

func (*AgentActivity) CaptureVideoFrame added in v0.0.5

func (a *AgentActivity) CaptureVideoFrame(frame *model.VideoFrame) error

func (*AgentActivity) ClearUserTurn added in v0.0.5

func (a *AgentActivity) ClearUserTurn()

func (*AgentActivity) CommitUserTurn added in v0.0.5

func (a *AgentActivity) CommitUserTurn(opts *CommitUserTurnOpts)

func (*AgentActivity) Drain added in v0.0.5

func (a *AgentActivity) Drain(ctx context.Context) error

func (*AgentActivity) Interrupt added in v0.0.5

func (a *AgentActivity) Interrupt(force bool) error

func (*AgentActivity) OnEndOfSpeech

func (a *AgentActivity) OnEndOfSpeech(ev *vad.VADEvent)

func (*AgentActivity) OnFinalTranscript

func (a *AgentActivity) OnFinalTranscript(ev *stt.SpeechEvent)

func (*AgentActivity) OnInterimTranscript added in v0.0.5

func (a *AgentActivity) OnInterimTranscript(ev *stt.SpeechEvent)

func (*AgentActivity) OnStartOfSpeech

func (a *AgentActivity) OnStartOfSpeech(ev *vad.VADEvent)

Event callbacks from RecognitionHooks

func (*AgentActivity) Pause added in v0.0.5

func (a *AgentActivity) Pause() error

func (*AgentActivity) PauseScheduling added in v0.0.5

func (a *AgentActivity) PauseScheduling()

func (*AgentActivity) PushAudio added in v0.0.5

func (a *AgentActivity) PushAudio(frame *model.AudioFrame) error

func (*AgentActivity) PushVideo added in v0.0.5

func (a *AgentActivity) PushVideo(frame *model.VideoFrame) error

func (*AgentActivity) Resume added in v0.0.5

func (a *AgentActivity) Resume() error

func (*AgentActivity) ResumeScheduling added in v0.0.5

func (a *AgentActivity) ResumeScheduling()

func (*AgentActivity) ScheduleSpeech

func (a *AgentActivity) ScheduleSpeech(speech *SpeechHandle, priority int, force bool) error

func (*AgentActivity) Start

func (a *AgentActivity) Start()

func (*AgentActivity) Stop

func (a *AgentActivity) Stop()

func (*AgentActivity) UpdateOptions added in v0.0.5

func (a *AgentActivity) UpdateOptions(opts AgentSessionOptions)

type AgentEvent added in v0.0.5

type AgentEvent struct {
	Type      string  `json:"type"`
	Timestamp float64 `json:"timestamp"`

	UserStateChanged       *UserStateChangedEvent       `json:"user_state_changed,omitempty"`
	AgentStateChanged      *AgentStateChangedEvent      `json:"agent_state_changed,omitempty"`
	UserInputTranscribed   *UserInputTranscribedEvent   `json:"user_input_transcribed,omitempty"`
	AgentFalseInterruption *AgentFalseInterruptionEvent `json:"agent_false_interruption,omitempty"`
	MetricsCollected       *MetricsCollectedEvent       `json:"metrics_collected,omitempty"`
	ConversationItemAdded  *ConversationItemAddedEvent  `json:"conversation_item_added,omitempty"`
	FunctionToolsExecuted  *FunctionToolsExecutedEvent  `json:"function_tools_executed,omitempty"`
	AgentHandoff           *AgentHandoffEvent           `json:"agent_handoff,omitempty"`
	SpeechCreated          *SpeechCreatedEvent          `json:"speech_created,omitempty"`
	Error                  *ErrorEvent                  `json:"error,omitempty"`
	Close                  *CloseEvent                  `json:"close,omitempty"`
	ParticipantActive      *ParticipantActiveEvent      `json:"participant_active,omitempty"`
}

func NewAgentEvent added in v0.0.5

func NewAgentEvent(ev Event) *AgentEvent

func (*AgentEvent) MarshalJSON added in v0.0.5

func (ae *AgentEvent) MarshalJSON() ([]byte, error)

func (*AgentEvent) UnmarshalJSON added in v0.0.5

func (ae *AgentEvent) UnmarshalJSON(data []byte) error

type AgentFalseInterruptionEvent added in v0.0.5

type AgentFalseInterruptionEvent struct {
	Resumed   bool      `json:"resumed"`
	CreatedAt time.Time `json:"created_at"`
}

func (*AgentFalseInterruptionEvent) GetType added in v0.0.5

func (e *AgentFalseInterruptionEvent) GetType() string

type AgentHandoffEvent added in v0.0.5

type AgentHandoffEvent struct {
	OldAgent   AgentInterface    `json:"-"`
	NewAgent   AgentInterface    `json:"-"`
	OldAgentID string            `json:"old_agent_id"`
	NewAgentID string            `json:"new_agent_id"`
	Handoff    *llm.AgentHandoff `json:"handoff"`
	CreatedAt  time.Time         `json:"created_at"`
}

func (*AgentHandoffEvent) GetType added in v0.0.5

func (e *AgentHandoffEvent) GetType() string

type AgentHandoffRunEvent added in v0.0.5

type AgentHandoffRunEvent struct {
	Item     *llm.AgentHandoff
	OldAgent AgentInterface
	NewAgent AgentInterface
}

func (*AgentHandoffRunEvent) GetCreatedAt added in v0.0.5

func (e *AgentHandoffRunEvent) GetCreatedAt() time.Time

func (*AgentHandoffRunEvent) GetItem added in v0.0.5

func (e *AgentHandoffRunEvent) GetItem() llm.ChatItem

func (*AgentHandoffRunEvent) RunEventType added in v0.0.5

func (e *AgentHandoffRunEvent) RunEventType() string

type AgentInput added in v0.0.5

type AgentInput struct {
	Audio AudioInput
	Text  TextInput
	Video VideoInput
}

type AgentInterface

type AgentInterface interface {
	OnEnter(ctx context.Context) error
	OnExit(ctx context.Context) error
	OnUserTurnCompleted(ctx context.Context, chatCtx *llm.ChatContext, newMsg *llm.ChatMessage) error
	GetAgent() *Agent
	GetActivity() *AgentActivity
}

type AgentOutput added in v0.0.5

type AgentOutput struct {
	Audio         AudioOutput
	Transcription TextOutput
	Video         VideoOutput
	Publisher     MediaPublisher
}

type AgentSession

type AgentSession struct {
	Options AgentSessionOptions

	ChatCtx   *llm.ChatContext
	Agent     AgentInterface
	STT       stt.STT
	VAD       vad.VAD
	LLM       llm.LLM
	TTS       tts.TTS
	Tools     []interface{}
	Assistant *PipelineAgent
	Room      *lksdk.Room

	Input  AgentInput
	Output AgentOutput

	MetricsCollector *telemetry.UsageCollector
	Timeline         *EventTimeline

	UserState  UserState
	AgentState AgentState

	// Transcript attribution — set by RoomIO when tracks are established.
	RemoteUserIdentity string
	RemoteTrackSID     string
	AgentTrackSID      string

	Activity *AgentActivity

	// Event channels
	AgentStateChangedCh chan AgentStateChangedEvent
	UserStateChangedCh  chan UserStateChangedEvent
	// contains filtered or unexported fields
}

func NewAgentSession

func NewAgentSession(agent AgentInterface, room *lksdk.Room, opts AgentSessionOptions) *AgentSession

func (*AgentSession) ClearUserTurn added in v0.0.5

func (s *AgentSession) ClearUserTurn()

func (*AgentSession) Close added in v0.0.5

func (s *AgentSession) Close() error

func (*AgentSession) CommitUserTurn added in v0.0.5

func (s *AgentSession) CommitUserTurn(opts *CommitUserTurnOpts)

func (*AgentSession) GenerateReply

func (s *AgentSession) GenerateReply(ctx context.Context, userInput string, allowInterruptions bool) (any, error)

func (*AgentSession) GetAgentTrackSID

func (s *AgentSession) GetAgentTrackSID() string

func (*AgentSession) GetPublisher added in v0.0.5

func (s *AgentSession) GetPublisher() interface {
	Identity() string
	PublishData(data []byte, topic string, destinationSIDs []string) error
}

func (*AgentSession) Interrupt added in v0.0.5

func (s *AgentSession) Interrupt(ctx context.Context) error

func (*AgentSession) OnAudioFrame

func (s *AgentSession) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*AgentSession) Pause added in v0.0.5

func (s *AgentSession) Pause() error

func (*AgentSession) PublishAgentTranscript

func (s *AgentSession) PublishAgentTranscript(text string)

PublishAgentTranscript publishes the agent's LLM response to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).

func (*AgentSession) PublishUserTranscript

func (s *AgentSession) PublishUserTranscript(text string)

PublishUserTranscript publishes the user's STT transcript to the Playground. Sends both a ChatMessage (chat panel) and a Transcription packet (transcript overlay).

func (*AgentSession) Resume added in v0.0.5

func (s *AgentSession) Resume() error

func (*AgentSession) Say added in v0.0.5

func (s *AgentSession) Say(text string, allowInterruptions bool) (*SpeechHandle, error)

func (*AgentSession) SetAgentTrackSID

func (s *AgentSession) SetAgentTrackSID(sid string)

func (*AgentSession) SetAudioOutput added in v0.0.5

func (s *AgentSession) SetAudioOutput(out AudioOutput)

func (*AgentSession) SetRemoteTrackSID

func (s *AgentSession) SetRemoteTrackSID(sid string)

func (*AgentSession) SetRemoteUserIdentity

func (s *AgentSession) SetRemoteUserIdentity(identity string)

func (*AgentSession) SetRoom added in v0.0.5

func (s *AgentSession) SetRoom(room *lksdk.Room)

SetRoom wires the LiveKit room to the session after connection. This initialises the ClientEventsDispatcher (RPC handlers, state broadcasting) so the Playground can discover the agent's audio track and state.

func (*AgentSession) SetVideoOutput added in v0.0.5

func (s *AgentSession) SetVideoOutput(out VideoOutput)

func (*AgentSession) Start

func (s *AgentSession) Start(ctx context.Context) error

func (*AgentSession) Stop

func (s *AgentSession) Stop(ctx context.Context) error

func (*AgentSession) TimelineSnapshot added in v0.0.5

func (s *AgentSession) TimelineSnapshot() []*AgentEvent

func (*AgentSession) UpdateAgent added in v0.0.5

func (s *AgentSession) UpdateAgent(agent AgentInterface, opts *UpdateAgentOpts) error

func (*AgentSession) UpdateAgentState

func (s *AgentSession) UpdateAgentState(state AgentState)

func (*AgentSession) UpdateOptions added in v0.0.5

func (s *AgentSession) UpdateOptions(opts AgentSessionOptions)

func (*AgentSession) UpdateUserState

func (s *AgentSession) UpdateUserState(state UserState)

type AgentSessionOptions

type AgentSessionOptions struct {
	AllowInterruptions            bool
	DiscardAudioIfUninterruptible bool
	MinInterruptionDuration       float64
	MinInterruptionWords          int
	MinEndpointingDelay           float64
	MaxEndpointingDelay           float64
	MaxToolSteps                  int
	UserAwayTimeout               float64
	FalseInterruptionTimeout      float64
	ResumeFalseInterruption       bool
	MinConsecutiveSpeechDelay     float64
	UseTTSAlignedTranscript       bool
	PreemptiveGeneration          bool
	AECWarmupDuration             float64
	SpeakingRate                  float64
	TranscriptRefreshRate         time.Duration
	LinkedParticipant             lksdk.Participant
	IVRDetection                  bool
}

type AgentState

type AgentState string
const (
	AgentStateInitializing AgentState = "initializing"
	AgentStateIdle         AgentState = "idle"
	AgentStateListening    AgentState = "listening"
	AgentStateThinking     AgentState = "thinking"
	AgentStateSpeaking     AgentState = "speaking"
)

type AgentStateChangedEvent

type AgentStateChangedEvent struct {
	OldState  AgentState `json:"old_state"`
	NewState  AgentState `json:"new_state"`
	CreatedAt time.Time  `json:"created_at"`
}

func (*AgentStateChangedEvent) GetType added in v0.0.5

func (e *AgentStateChangedEvent) GetType() string

type AgentTask

type AgentTask[T any] struct {
	Agent
	Result chan T
	Err    chan error
}

AgentTask represents a sub-agent execution that returns a result

func NewAgentTask

func NewAgentTask[T any](instructions string) *AgentTask[T]

func (*AgentTask[T]) Complete

func (t *AgentTask[T]) Complete(result T)

func (*AgentTask[T]) Fail

func (t *AgentTask[T]) Fail(err error)

func (*AgentTask[T]) WaitAny

func (t *AgentTask[T]) WaitAny(ctx context.Context) (any, error)

type AudioConfig

type AudioConfig struct {
	Source      AudioSource
	Volume      float64
	Probability float64
}

type AudioInput added in v0.0.5

type AudioInput interface {
	Label() string
	Stream() <-chan *model.AudioFrame
	OnAttached()
	OnDetached()
}

AudioInput represents a source of audio frames (e.g., mic or remote track)

type AudioOutput added in v0.0.5

type AudioOutput interface {
	Label() string
	CaptureFrame(frame *model.AudioFrame) error
	Flush()
	WaitForPlayout(ctx context.Context) error
	ClearBuffer()
	OnAttached()
	OnDetached()
	Pause()
	Resume()
	OnPlaybackStarted(func(ev PlaybackStartedEvent))
	OnPlaybackFinished(func(ev PlaybackFinishedEvent))
}

AudioOutput represents a destination for audio frames (e.g., speakers or remote track)

type AudioReceiver added in v0.0.5

type AudioReceiver interface {
	Start(ctx context.Context) error
	Stream() <-chan *model.AudioFrame
	NotifyPlaybackFinished(playbackPosition time.Duration, interrupted bool) error
	Close() error
}

type AudioRecognition

type AudioRecognition struct {
	// contains filtered or unexported fields
}

func NewAudioRecognition

func NewAudioRecognition(session *AgentSession, hooks RecognitionHooks, s stt.STT, v vad.VAD) *AudioRecognition

func (*AudioRecognition) Close added in v0.0.5

func (ar *AudioRecognition) Close()

func (*AudioRecognition) Flush added in v0.0.5

func (ar *AudioRecognition) Flush() error

func (*AudioRecognition) PushAudio

func (ar *AudioRecognition) PushAudio(frame *model.AudioFrame) error

func (*AudioRecognition) Start

func (ar *AudioRecognition) Start(ctx context.Context) error

type AudioSource

type AudioSource interface{} // Can be string, BuiltinAudioClip, or <-chan *model.AudioFrame

type AvatarIO

type AvatarIO interface {
	SendAvatarData(ctx context.Context, data []byte) error
}

AvatarIO defines how Avatar commands/data are sent.

type AvatarOptions added in v0.0.5

type AvatarOptions struct {
	VideoWidth      int
	VideoHeight     int
	VideoFPS        float64
	AudioSampleRate int
	AudioChannels   int
}

type AvatarRunner

type AvatarRunner struct {
	// contains filtered or unexported fields
}

AvatarRunner coordinates Avatar IO and LipSync events.

func NewAvatarRunner

func NewAvatarRunner(room *lksdk.Room, audioRecv AudioReceiver, videoGen VideoGenerator, opts AvatarOptions, avSync AVSynchronizer, lazyPublish bool) *AvatarRunner

func (*AvatarRunner) SendLipSyncEvent added in v0.0.5

func (r *AvatarRunner) SendLipSyncEvent(ctx context.Context, data []byte) error

func (*AvatarRunner) Start

func (r *AvatarRunner) Start(ctx context.Context) error

func (*AvatarRunner) Stop

func (r *AvatarRunner) Stop()

type BackgroundAudioPlayer

type BackgroundAudioPlayer struct {
	// contains filtered or unexported fields
}

func NewBackgroundAudioPlayer

func NewBackgroundAudioPlayer(ambientSound, thinkingSound interface{}) *BackgroundAudioPlayer

func (*BackgroundAudioPlayer) AgentStateChanged

func (p *BackgroundAudioPlayer) AgentStateChanged(newState AgentState)

func (*BackgroundAudioPlayer) Close

func (p *BackgroundAudioPlayer) Close() error

func (*BackgroundAudioPlayer) Play

func (p *BackgroundAudioPlayer) Play(audio interface{}, loop bool) *PlayHandle

func (*BackgroundAudioPlayer) Start

func (p *BackgroundAudioPlayer) Start(room *lksdk.Room, agentSession *AgentSession) error

type BuiltinAudioClip

type BuiltinAudioClip string
const (
	CityAmbience    BuiltinAudioClip = "city-ambience.ogg"
	ForestAmbience  BuiltinAudioClip = "forest-ambience.ogg"
	OfficeAmbience  BuiltinAudioClip = "office-ambience.ogg"
	CrowdedRoom     BuiltinAudioClip = "crowded-room.ogg"
	KeyboardTyping  BuiltinAudioClip = "keyboard-typing.ogg"
	KeyboardTyping2 BuiltinAudioClip = "keyboard-typing2.ogg"
	HoldMusic       BuiltinAudioClip = "hold_music.ogg"
)

func (BuiltinAudioClip) Path

func (b BuiltinAudioClip) Path() string

type ChatMessageRunEvent added in v0.0.5

type ChatMessageRunEvent struct {
	Item *llm.ChatMessage
}

func (*ChatMessageRunEvent) GetCreatedAt added in v0.0.5

func (e *ChatMessageRunEvent) GetCreatedAt() time.Time

func (*ChatMessageRunEvent) GetItem added in v0.0.5

func (e *ChatMessageRunEvent) GetItem() llm.ChatItem

func (*ChatMessageRunEvent) RunEventType added in v0.0.5

func (e *ChatMessageRunEvent) RunEventType() string

type ClientEventPayload

type ClientEventPayload struct {
	Type  string `json:"type"`
	State string `json:"state,omitempty"`
}

type ClientEventsDispatcher

type ClientEventsDispatcher struct {
	// contains filtered or unexported fields
}

ClientEventsDispatcher manages sending Agent states to the LiveKit Room DataChannel and handling inbound RPC and DataChannel requests.

func NewClientEventsDispatcher

func NewClientEventsDispatcher(room *lksdk.Room, session *AgentSession) *ClientEventsDispatcher

func (*ClientEventsDispatcher) Close added in v0.0.5

func (d *ClientEventsDispatcher) Close()

func (*ClientEventsDispatcher) DispatchAgentState

func (d *ClientEventsDispatcher) DispatchAgentState(state AgentState)

DispatchAgentState emits AgentStateIdle, AgentStateThinking, AgentStateSpeaking

func (*ClientEventsDispatcher) DispatchUserState

func (d *ClientEventsDispatcher) DispatchUserState(state UserState)

DispatchUserState emits UserStateListening, UserStateSpeaking

func (*ClientEventsDispatcher) RegisterTextInput added in v0.0.5

func (d *ClientEventsDispatcher) RegisterTextInput(cb TextInputCallback)

type CloseEvent

type CloseEvent struct {
	Reason    CloseReason `json:"reason"`
	Error     error       `json:"error,omitempty"`
	CreatedAt time.Time   `json:"created_at"`
}

func (*CloseEvent) GetType

func (e *CloseEvent) GetType() string

type CloseReason

type CloseReason string
const (
	CloseReasonError                   CloseReason = "error"
	CloseReasonJobShutdown             CloseReason = "job_shutdown"
	CloseReasonParticipantDisconnected CloseReason = "participant_disconnected"
	CloseReasonUserInitiated           CloseReason = "user_initiated"
	CloseReasonTaskCompleted           CloseReason = "task_completed"
)

type CommitUserTurnOpts added in v0.0.5

type CommitUserTurnOpts struct {
	AudioDetached     bool
	TranscriptTimeout time.Duration
	STTFlushDuration  time.Duration
	SkipReply         bool
}

type ConversationItemAddedEvent

type ConversationItemAddedEvent struct {
	Item      llm.ChatItem `json:"item"`
	CreatedAt time.Time    `json:"created_at"`
}

func (*ConversationItemAddedEvent) GetType

func (e *ConversationItemAddedEvent) GetType() string

type DataStreamIO

type DataStreamIO struct {
	// contains filtered or unexported fields
}

func NewDataStreamIO

func NewDataStreamIO(room *lksdk.Room) *DataStreamIO

func (*DataStreamIO) SendAvatarData

func (io *DataStreamIO) SendAvatarData(ctx context.Context, data []byte) error

type DtmfEvent

type DtmfEvent struct {
	Digit string
	Time  time.Time
}

type EndOfTurnInfo

type EndOfTurnInfo struct {
	SkipReply            bool
	TranscriptTimeout    time.Duration
	STTFlushDuration     time.Duration
	NewTranscript        string
	TranscriptConfidence float64
	StartedSpeakingAt    *float64
	StoppedSpeakingAt    *float64
}

type ErrorEvent added in v0.0.5

type ErrorEvent struct {
	Error     error     `json:"error"`
	Source    any       `json:"source,omitempty"`
	CreatedAt time.Time `json:"created_at"`
}

func (*ErrorEvent) GetType added in v0.0.5

func (e *ErrorEvent) GetType() string

type EvaluationResult

type EvaluationResult struct {
	Judgments map[string]string
}

type Event

type Event interface {
	GetType() string
}

type EventTimeline added in v0.0.5

type EventTimeline struct {
	OnEvent func(ev *AgentEvent)
	// contains filtered or unexported fields
}

func NewEventTimeline added in v0.0.5

func NewEventTimeline() *EventTimeline

func (*EventTimeline) AddEvent added in v0.0.5

func (t *EventTimeline) AddEvent(ev Event)

func (*EventTimeline) Clear added in v0.0.5

func (t *EventTimeline) Clear()

Clear releases all stored events and the OnEvent callback so the timeline (and everything it references) can be garbage-collected.

func (*EventTimeline) Snapshot added in v0.0.5

func (t *EventTimeline) Snapshot() []*AgentEvent

type FunctionCallOutputRunEvent added in v0.0.5

type FunctionCallOutputRunEvent struct {
	Item *llm.FunctionCallOutput
}

func (*FunctionCallOutputRunEvent) GetCreatedAt added in v0.0.5

func (e *FunctionCallOutputRunEvent) GetCreatedAt() time.Time

func (*FunctionCallOutputRunEvent) GetItem added in v0.0.5

func (*FunctionCallOutputRunEvent) RunEventType added in v0.0.5

func (e *FunctionCallOutputRunEvent) RunEventType() string

type FunctionCallRunEvent added in v0.0.5

type FunctionCallRunEvent struct {
	Item *llm.FunctionCall
}

func (*FunctionCallRunEvent) GetCreatedAt added in v0.0.5

func (e *FunctionCallRunEvent) GetCreatedAt() time.Time

func (*FunctionCallRunEvent) GetItem added in v0.0.5

func (e *FunctionCallRunEvent) GetItem() llm.ChatItem

func (*FunctionCallRunEvent) RunEventType added in v0.0.5

func (e *FunctionCallRunEvent) RunEventType() string

type FunctionToolsExecutedEvent added in v0.0.5

type FunctionToolsExecutedEvent struct {
	FunctionCalls       []llm.FunctionCall        `json:"function_calls"`
	FunctionCallOutputs []*llm.FunctionCallOutput `json:"function_call_outputs"`
	CreatedAt           time.Time                 `json:"created_at"`
	HasToolReply        bool                      `json:"has_tool_reply"`
	HasAgentHandoff     bool                      `json:"has_agent_handoff"`
}

func (*FunctionToolsExecutedEvent) GetType added in v0.0.5

func (e *FunctionToolsExecutedEvent) GetType() string

type GenerateReplyOpts added in v0.0.5

type GenerateReplyOpts struct {
	AllowInterruptions bool
}

type GetAgentInfoResponse added in v0.0.5

type GetAgentInfoResponse struct {
	ID           string         `json:"id"`
	Instructions string         `json:"instructions,omitempty"`
	Tools        []string       `json:"tools"`
	ChatCtx      []llm.ChatItem `json:"chat_ctx"`
}

type GetChatHistoryResponse added in v0.0.5

type GetChatHistoryResponse struct {
	Items []llm.ChatItem `json:"items"`
}

type GetSessionStateResponse added in v0.0.5

type GetSessionStateResponse struct {
	AgentState string         `json:"agent_state"`
	UserState  string         `json:"user_state"`
	AgentID    string         `json:"agent_id"`
	Options    map[string]any `json:"options"`
	CreatedAt  float64        `json:"created_at"`
}

type IVRActivity

type IVRActivity struct {
	AgentIntf AgentInterface
	Agent     *Agent
	// contains filtered or unexported fields
}

func NewIVRActivity

func NewIVRActivity(agentIntf AgentInterface) *IVRActivity

func (*IVRActivity) OnDtmf

func (i *IVRActivity) OnDtmf(digit string)

func (*IVRActivity) SetDigitCallback

func (i *IVRActivity) SetDigitCallback(timeout time.Duration, cb func(buffer string) (bool, error))

func (*IVRActivity) Start

func (i *IVRActivity) Start()

func (*IVRActivity) Stop

func (i *IVRActivity) Stop()

type InputDetails

type InputDetails struct {
	Modality string
}

func DefaultInputDetails

func DefaultInputDetails() InputDetails

type JobResult added in v0.0.5

type JobResult interface {
	Wait(ctx context.Context) error
	GetEvents() []RunEvent
}

type LLMGenerationData

type LLMGenerationData struct {
	TextCh        chan string
	FunctionCh    chan *llm.FunctionToolCall
	FullTextCh    chan string // receives the complete assembled text when streaming is done
	GeneratedText string
	Usage         *llm.CompletionUsage
}

func PerformLLMInference

func PerformLLMInference(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []interface{}) (*LLMGenerationData, error)

type LLMNodeFunc added in v0.0.5

type LLMNodeFunc func(ctx context.Context, l llm.LLM, chatCtx *llm.ChatContext, tools []interface{}) (*LLMGenerationData, error)

type LLMTurnDetector

type LLMTurnDetector struct {
	// contains filtered or unexported fields
}

LLMTurnDetector uses an LLM to predict if the user has finished speaking. It sends the recent conversation history to the LLM and asks for a probability score.

func NewLLMTurnDetector

func NewLLMTurnDetector(llmInstance llm.LLM) *LLMTurnDetector

func (*LLMTurnDetector) PredictEndOfTurn

func (m *LLMTurnDetector) PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)

type MediaPublisher added in v0.0.5

type MediaPublisher interface {
	Identity() string
	PublishData(data []byte, topic string, destinationSIDs []string) error
	SetAttributes(attrs map[string]string) error
}

type MetricsCollectedEvent

type MetricsCollectedEvent struct {
	Metrics   telemetry.AgentMetrics `json:"metrics"`
	CreatedAt time.Time              `json:"created_at"`
}

func (*MetricsCollectedEvent) GetType

func (e *MetricsCollectedEvent) GetType() string

type MultimodalAgent

type MultimodalAgent struct {
	PublishAudio func(frame *model.AudioFrame) error
	// contains filtered or unexported fields
}

func NewMultimodalAgent

func NewMultimodalAgent(
	m llm.RealtimeModel,
	chatCtx *llm.ChatContext,
) *MultimodalAgent

func (*MultimodalAgent) OnAudioFrame

func (ma *MultimodalAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*MultimodalAgent) Start

func (ma *MultimodalAgent) Start(ctx context.Context, s *AgentSession) error

type ParticipantActiveEvent added in v0.0.5

type ParticipantActiveEvent struct {
	ParticipantID string    `json:"participant_id"`
	Identity      string    `json:"identity"`
	Active        bool      `json:"active"`
	CreatedAt     time.Time `json:"created_at"`
}

func (*ParticipantActiveEvent) GetType added in v0.0.5

func (e *ParticipantActiveEvent) GetType() string

type ParticipantReference added in v0.0.5

type ParticipantReference interface {
	Identity() string
	SID() string
}

type PipelineAgent

type PipelineAgent struct {
	LLM llm.LLM
	// contains filtered or unexported fields
}

func NewPipelineAgent

func NewPipelineAgent(
	vad vad.VAD,
	sttInstance stt.STT,
	llmObj llm.LLM,
	ttsInstance tts.TTS,
	chatCtx *llm.ChatContext,
) *PipelineAgent

func (*PipelineAgent) GenerateReply added in v0.0.5

func (va *PipelineAgent) GenerateReply(speech *SpeechHandle)

func (*PipelineAgent) OnAudioFrame

func (va *PipelineAgent) OnAudioFrame(ctx context.Context, frame *model.AudioFrame)

func (*PipelineAgent) Start

func (va *PipelineAgent) Start(ctx context.Context, s *AgentSession) error

func (*PipelineAgent) Stop added in v0.0.5

func (va *PipelineAgent) Stop()

type PlayHandle

type PlayHandle struct {
	// contains filtered or unexported fields
}

func (*PlayHandle) Done

func (h *PlayHandle) Done() bool

func (*PlayHandle) Stop

func (h *PlayHandle) Stop()

func (*PlayHandle) WaitForPlayout

func (h *PlayHandle) WaitForPlayout()

type PlaybackFinishedEvent added in v0.0.5

type PlaybackFinishedEvent struct {
	PlaybackPosition       time.Duration
	Interrupted            bool
	SynchronizedTranscript string
}

type PlaybackStartedEvent added in v0.0.5

type PlaybackStartedEvent struct {
	CreatedAt time.Time
}

type Plugin

type Plugin interface {
	Title() string
	Version() string
	Package() string
	DownloadFiles() error
}

func RegisteredPlugins

func RegisteredPlugins() []Plugin

type QueueIO

type QueueIO struct {
	// contains filtered or unexported fields
}

func NewQueueIO

func NewQueueIO() *QueueIO

func (*QueueIO) ReadQueue

func (io *QueueIO) ReadQueue() <-chan []byte

func (*QueueIO) SendAvatarData

func (io *QueueIO) SendAvatarData(ctx context.Context, data []byte) error

type RealtimeAudioOutputNodeFunc added in v0.0.5

type RealtimeAudioOutputNodeFunc func(ctx context.Context, audio <-chan *model.AudioFrame) (<-chan *model.AudioFrame, error)

type RecognitionHooks

type RecognitionHooks interface {
	OnStartOfSpeech(ev *vad.VADEvent)
	OnEndOfSpeech(ev *vad.VADEvent)
	OnInterimTranscript(ev *stt.SpeechEvent)
	OnFinalTranscript(ev *stt.SpeechEvent)
}

type RecordingOptions

type RecordingOptions struct {
	Audio      bool `json:"audio"`
	Traces     bool `json:"traces"`
	Logs       bool `json:"logs"`
	Transcript bool `json:"transcript"`
}

type RunAssert

type RunAssert struct {
	ChatCtx *llm.ChatContext
	// contains filtered or unexported fields
}

func (*RunAssert) ContainsMessage

func (a *RunAssert) ContainsMessage(role llm.ChatRole, content string) *RunAssert

func (*RunAssert) HasError

func (a *RunAssert) HasError() error

func (*RunAssert) IsFunctionCall

func (a *RunAssert) IsFunctionCall(name string) *RunAssert

func (*RunAssert) Judge

func (a *RunAssert) Judge(ctx context.Context, evaluator evals.Evaluator, llmInstance llm.LLM) (*RunAssert, error)

type RunContext

type RunContext struct {
	Session      *AgentSession
	SpeechHandle *SpeechHandle
	FunctionCall *llm.FunctionCall
}

func GetRunContext

func GetRunContext(ctx context.Context) *RunContext

func (*RunContext) WaitForPlayout

func (r *RunContext) WaitForPlayout(ctx context.Context) error

type RunEvent added in v0.0.5

type RunEvent interface {
	RunEventType() string
	GetCreatedAt() time.Time
	GetItem() llm.ChatItem
}

type RunResult

type RunResult[T any] struct {
	ChatCtx   *llm.ChatContext
	Timestamp float64
	Expect    *RunAssert

	FinalOutput T

	Events []RunEvent
	// contains filtered or unexported fields
}

func GenerateTypedReply added in v0.0.5

func GenerateTypedReply[T any](ctx context.Context, s *AgentSession, userInput string, opts *GenerateReplyOpts) (*RunResult[T], error)

func NewRunResult

func NewRunResult[T any](chatCtx *llm.ChatContext) *RunResult[T]

func (*RunResult[T]) AddEvent added in v0.0.5

func (r *RunResult[T]) AddEvent(ev RunEvent)

func (*RunResult[T]) Done added in v0.0.5

func (r *RunResult[T]) Done() <-chan struct{}

func (*RunResult[T]) Eval added in v0.0.5

func (r *RunResult[T]) Eval(ctx context.Context, evaluator evals.Evaluator, llmInstance llm.LLM) (*evals.JudgmentResult, error)

func (*RunResult[T]) GetEvents added in v0.0.5

func (r *RunResult[T]) GetEvents() []RunEvent

func (*RunResult[T]) Wait added in v0.0.5

func (r *RunResult[T]) Wait(ctx context.Context) error

func (*RunResult[T]) WaitAny added in v0.0.5

func (r *RunResult[T]) WaitAny(ctx context.Context) (T, error)

func (*RunResult[T]) WatchHandle added in v0.0.5

func (r *RunResult[T]) WatchHandle(ctx context.Context, handle *SpeechHandle)

func (*RunResult[T]) WatchTask added in v0.0.5

func (r *RunResult[T]) WatchTask(done <-chan struct{})

type RunResultInterface added in v0.0.5

type RunResultInterface interface {
	AddEvent(ev RunEvent)
	WatchTask(done <-chan struct{})
}

type STTNodeFunc added in v0.0.5

type STTNodeFunc func(ctx context.Context, s stt.STT, audio <-chan *model.AudioFrame) (<-chan *stt.SpeechEvent, error)

type SendMessageRequest added in v0.0.5

type SendMessageRequest struct {
	Text string `json:"text"`
}

type SendMessageResponse added in v0.0.5

type SendMessageResponse struct {
	Items []llm.ChatItem `json:"items"`
}

type SessionInfo added in v0.0.5

type SessionInfo interface {
	LocalParticipantID() string
}

type SessionReport

type SessionReport struct {
	RecordingOptions        RecordingOptions    `json:"recording_options"`
	JobID                   string              `json:"job_id"`
	RoomID                  string              `json:"room_id"`
	Room                    string              `json:"room"`
	Options                 AgentSessionOptions `json:"options"`
	Events                  []any               `json:"events"`
	Timeline                []*AgentEvent       `json:"timeline,omitempty"`
	ChatHistory             *llm.ChatContext    `json:"chat_history"`
	AudioRecordingPath      *string             `json:"audio_recording_path,omitempty"`
	AudioRecordingStartedAt *float64            `json:"audio_recording_started_at,omitempty"`
	Duration                *float64            `json:"duration,omitempty"`
	StartedAt               *float64            `json:"started_at,omitempty"`
	Timestamp               float64             `json:"timestamp"`
	// contains filtered or unexported fields
}

func NewSessionReport

func NewSessionReport() *SessionReport

func (*SessionReport) AddEvent added in v0.0.5

func (r *SessionReport) AddEvent(event any)

func (*SessionReport) SetChatHistory added in v0.0.5

func (r *SessionReport) SetChatHistory(chatCtx *llm.ChatContext)

func (*SessionReport) SetTimeline added in v0.0.5

func (r *SessionReport) SetTimeline(events []*AgentEvent)

func (*SessionReport) ToDict added in v0.0.5

func (r *SessionReport) ToDict() map[string]any

type SpeechCreatedEvent

type SpeechCreatedEvent struct {
	UserInitiated bool          `json:"user_initiated"`
	Source        string        `json:"source"`
	SpeechHandle  *SpeechHandle `json:"-"`
	ParticipantID string        `json:"participant_id,omitempty"`
	CreatedAt     time.Time     `json:"created_at"`
}

func (*SpeechCreatedEvent) GetType

func (e *SpeechCreatedEvent) GetType() string

type SpeechHandle

type SpeechHandle struct {
	ID                 string
	AllowInterruptions bool
	InputDetails       InputDetails
	Priority           int
	CreatedAt          time.Time

	FinalOutput any
	ManualText  string

	OnItemAdded func(item llm.ChatItem)
	RunResult   RunResultInterface
	// contains filtered or unexported fields
}

func NewSpeechHandle

func NewSpeechHandle(allowInterruptions bool, inputDetails InputDetails) *SpeechHandle

func (*SpeechHandle) Error added in v0.0.5

func (s *SpeechHandle) Error() error

func (*SpeechHandle) Interrupt

func (s *SpeechHandle) Interrupt(force bool) error

func (*SpeechHandle) IsDone

func (s *SpeechHandle) IsDone() bool

func (*SpeechHandle) IsInterrupted

func (s *SpeechHandle) IsInterrupted() bool

func (*SpeechHandle) IsScheduled

func (s *SpeechHandle) IsScheduled() bool

func (*SpeechHandle) MarkDone

func (s *SpeechHandle) MarkDone()

func (*SpeechHandle) MarkDoneWithError added in v0.0.5

func (s *SpeechHandle) MarkDoneWithError(err error)

func (*SpeechHandle) MarkScheduled

func (s *SpeechHandle) MarkScheduled()

func (*SpeechHandle) Wait

func (s *SpeechHandle) Wait(ctx context.Context) error

type StreamRequest added in v0.0.5

type StreamRequest struct {
	ID      string `json:"id"`
	Method  string `json:"method"`
	Payload string `json:"payload"`
}

type StreamResponse added in v0.0.5

type StreamResponse struct {
	ID      string `json:"id"`
	Payload string `json:"payload,omitempty"`
	Error   string `json:"error,omitempty"`
}

type SyncEvent added in v0.0.5

type SyncEvent struct {
	Text      string
	Flush     bool
	SegmentID string
}

type SyncedAudioOutput added in v0.0.5

type SyncedAudioOutput struct {
	// contains filtered or unexported fields
}

SyncedAudioOutput wraps an AudioOutput and pushes frames to the synchronizer

func NewSyncedAudioOutput added in v0.0.5

func NewSyncedAudioOutput(sync *TranscriptSynchronizer, next AudioOutput) *SyncedAudioOutput

func (*SyncedAudioOutput) CaptureFrame added in v0.0.5

func (s *SyncedAudioOutput) CaptureFrame(frame *model.AudioFrame) error

func (*SyncedAudioOutput) ClearBuffer added in v0.0.5

func (s *SyncedAudioOutput) ClearBuffer()

func (*SyncedAudioOutput) Flush added in v0.0.5

func (s *SyncedAudioOutput) Flush()

func (*SyncedAudioOutput) Label added in v0.0.5

func (s *SyncedAudioOutput) Label() string

func (*SyncedAudioOutput) OnAttached added in v0.0.5

func (s *SyncedAudioOutput) OnAttached()

func (*SyncedAudioOutput) OnDetached added in v0.0.5

func (s *SyncedAudioOutput) OnDetached()

func (*SyncedAudioOutput) OnPlaybackFinished added in v0.0.5

func (s *SyncedAudioOutput) OnPlaybackFinished(f func(ev PlaybackFinishedEvent))

func (*SyncedAudioOutput) OnPlaybackStarted added in v0.0.5

func (s *SyncedAudioOutput) OnPlaybackStarted(f func(ev PlaybackStartedEvent))

func (*SyncedAudioOutput) Pause added in v0.0.5

func (s *SyncedAudioOutput) Pause()

func (*SyncedAudioOutput) Resume added in v0.0.5

func (s *SyncedAudioOutput) Resume()

func (*SyncedAudioOutput) WaitForPlayout added in v0.0.5

func (s *SyncedAudioOutput) WaitForPlayout(ctx context.Context) error

type SyncedTextOutput added in v0.0.5

type SyncedTextOutput struct {
	// contains filtered or unexported fields
}

SyncedTextOutput wraps a TextOutput and pushes text to the synchronizer

func NewSyncedTextOutput added in v0.0.5

func NewSyncedTextOutput(sync *TranscriptSynchronizer, next TextOutput) *SyncedTextOutput

func (*SyncedTextOutput) CaptureText added in v0.0.5

func (s *SyncedTextOutput) CaptureText(text string) error

func (*SyncedTextOutput) Flush added in v0.0.5

func (s *SyncedTextOutput) Flush()

func (*SyncedTextOutput) Label added in v0.0.5

func (s *SyncedTextOutput) Label() string

func (*SyncedTextOutput) OnAttached added in v0.0.5

func (s *SyncedTextOutput) OnAttached()

func (*SyncedTextOutput) OnDetached added in v0.0.5

func (s *SyncedTextOutput) OnDetached()

func (*SyncedTextOutput) SetSegmentID added in v0.0.5

func (s *SyncedTextOutput) SetSegmentID(id string)

type TTSGenerationData

type TTSGenerationData struct {
	AudioCh       chan *model.AudioFrame
	AlignedTextCh chan string
	TTFB          time.Duration
}

func PerformTTSInference

func PerformTTSInference(ctx context.Context, t tts.TTS, textCh <-chan string) (*TTSGenerationData, error)

type TTSNodeFunc added in v0.0.5

type TTSNodeFunc func(ctx context.Context, t tts.TTS, textCh <-chan string) (*TTSGenerationData, error)

type Tagger

type Tagger struct {
	// contains filtered or unexported fields
}

func NewTagger

func NewTagger() *Tagger

func (*Tagger) Add

func (t *Tagger) Add(tag string)

func (*Tagger) Evaluation

func (t *Tagger) Evaluation(result *EvaluationResult)

func (*Tagger) Fail

func (t *Tagger) Fail(reason string)

func (*Tagger) OutcomeReason

func (t *Tagger) OutcomeReason() string

func (*Tagger) Remove

func (t *Tagger) Remove(tag string)

func (*Tagger) Success

func (t *Tagger) Success(reason string)

func (*Tagger) Tags

func (t *Tagger) Tags() []string

type TaskWaiter

type TaskWaiter interface {
	WaitAny(ctx context.Context) (any, error)
}

type TextInput added in v0.0.5

type TextInput interface {
	Label() string
	OnAttached()
	OnDetached()
}

TextInput represents a source of text (e.g., chat messages or remote text tracks)

type TextInputCallback added in v0.0.5

type TextInputCallback func(s *AgentSession, ev TextInputEvent) error

type TextInputEvent added in v0.0.5

type TextInputEvent struct {
	Text        string            `json:"text"`
	Participant lksdk.Participant `json:"-"`
}

type TextOutput added in v0.0.5

type TextOutput interface {
	Label() string
	CaptureText(text string) error
	SetSegmentID(id string)
	Flush()
	OnAttached()
	OnDetached()
}

TextOutput represents a destination for text (e.g., transcriptions)

type ToolExecutionOutput

type ToolExecutionOutput struct {
	FncCall       llm.FunctionCall
	FncCallOut    *llm.FunctionCallOutput
	RawOutput     any
	RawError      error
	ReplyRequired bool
	AgentTask     AgentInterface
}

type TranscriptSynchronizer

type TranscriptSynchronizer struct {
	// contains filtered or unexported fields
}

TranscriptSynchronizer drip-feeds text to match the playout speed of audio.

func NewTranscriptSynchronizer

func NewTranscriptSynchronizer(speakingRate float64, refreshRate time.Duration) *TranscriptSynchronizer

NewTranscriptSynchronizer initializes the synchronizer. Default speaking rate is usually ~3.83 syllables/sec.

func (*TranscriptSynchronizer) Close

func (s *TranscriptSynchronizer) Close()

func (*TranscriptSynchronizer) EventCh

func (s *TranscriptSynchronizer) EventCh() <-chan SyncEvent

func (*TranscriptSynchronizer) Interrupt

func (s *TranscriptSynchronizer) Interrupt()

func (*TranscriptSynchronizer) PushAudio

func (s *TranscriptSynchronizer) PushAudio(frame *model.AudioFrame)

func (*TranscriptSynchronizer) PushText

func (s *TranscriptSynchronizer) PushText(text string)

func (*TranscriptSynchronizer) RotateSegment added in v0.0.5

func (s *TranscriptSynchronizer) RotateSegment()

RotateSegment flushes the remaining text buffer and resets the time accumulators for a new audio segment.

func (*TranscriptSynchronizer) SetSegmentID added in v0.0.5

func (s *TranscriptSynchronizer) SetSegmentID(id string)

type TranscriptionFilter

type TranscriptionFilter struct {
	SpeakingRate float64
}

func NewTranscriptionFilter

func NewTranscriptionFilter() *TranscriptionFilter

type TranscriptionNodeFunc added in v0.0.5

type TranscriptionNodeFunc func(ctx context.Context, textCh <-chan string) (<-chan string, error)

type TransitionActivityAction added in v0.0.5

type TransitionActivityAction string
const (
	TransitionActivityClose  TransitionActivityAction = "close"
	TransitionActivityPause  TransitionActivityAction = "pause"
	TransitionActivityStart  TransitionActivityAction = "start"
	TransitionActivityResume TransitionActivityAction = "resume"
)

type TurnDetectionMode

type TurnDetectionMode string
const (
	TurnDetectionModeSTT         TurnDetectionMode = "stt"
	TurnDetectionModeVAD         TurnDetectionMode = "vad"
	TurnDetectionModeRealtimeLLM TurnDetectionMode = "realtime_llm"
	TurnDetectionModeManual      TurnDetectionMode = "manual"
)

type TurnDetector

type TurnDetector interface {
	PredictEndOfTurn(ctx context.Context, chatCtx *llm.ChatContext) (float64, error)
}

type UpdateAgentOpts added in v0.0.5

type UpdateAgentOpts struct {
	PreviousActivity TransitionActivityAction
	NewActivity      TransitionActivityAction
}

type UserInputTranscribedEvent

type UserInputTranscribedEvent struct {
	Transcript string    `json:"transcript"`
	IsFinal    bool      `json:"is_final"`
	SpeakerID  string    `json:"speaker_id,omitempty"`
	Language   string    `json:"language,omitempty"`
	CreatedAt  time.Time `json:"created_at"`
}

func (*UserInputTranscribedEvent) GetType

func (e *UserInputTranscribedEvent) GetType() string

type UserState

type UserState string

Discriminator types

const (
	UserStateSpeaking  UserState = "speaking"
	UserStateListening UserState = "listening"
	UserStateAway      UserState = "away"
)

type UserStateChangedEvent

type UserStateChangedEvent struct {
	OldState  UserState `json:"old_state"`
	NewState  UserState `json:"new_state"`
	CreatedAt time.Time `json:"created_at"`
}

func (*UserStateChangedEvent) GetType added in v0.0.5

func (e *UserStateChangedEvent) GetType() string

type VideoGenerator added in v0.0.5

type VideoGenerator interface {
	PushAudio(frame *model.AudioFrame) error
	Stream() <-chan interface{} // Yields *model.AudioFrame, *model.VideoFrame, or *model.AudioSegmentEnd
	ClearBuffer() error
	Close() error
}

type VideoInput added in v0.0.5

type VideoInput interface {
	Label() string
	Stream() <-chan *model.VideoFrame
	OnAttached()
	OnDetached()
}

VideoInput represents a source of video frames (e.g., camera or remote track)

type VideoNodeFunc added in v0.0.5

type VideoNodeFunc func(ctx context.Context, video <-chan *model.VideoFrame) error

type VideoOutput added in v0.0.5

type VideoOutput interface {
	Label() string
	CaptureVideoFrame(frame *model.VideoFrame) error
	Flush()
	OnAttached()
	OnDetached()
}

VideoOutput represents a destination for video frames (e.g., screen or remote track)

type VoiceActivityVideoSampler

type VoiceActivityVideoSampler struct {
	// contains filtered or unexported fields
}

VoiceActivityVideoSampler samples video frames at a reduced rate (e.g. 1 fps) only when the user is speaking, to reduce LLM context token usage.

func NewVoiceActivityVideoSampler

func NewVoiceActivityVideoSampler(session *AgentSession, sampleRate float64, opts images.EncodeOptions) *VoiceActivityVideoSampler

func (*VoiceActivityVideoSampler) OnVideoFrame

func (s *VoiceActivityVideoSampler) OnVideoFrame(ctx context.Context, frame *images.VideoFrame) bool

OnVideoFrame should be called for every incoming WebRTC video frame. It returns true if the frame should be forwarded to the LLM.

func (*VoiceActivityVideoSampler) SetSpeaking

func (s *VoiceActivityVideoSampler) SetSpeaking(speaking bool)

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL