Documentation
¶
Index ¶
- type Command
- type CommandMeta
- type CommandType
- type Config
- type Event
- type EventHandler
- type EventType
- type Session
- func (s *Session) Close(reason string)
- func (s *Session) ForwardTransferRequest(target string)
- func (s *Session) HandleCommand(cmd Command)
- func (s *Session) IsTTSPlaying() bool
- func (s *Session) ProcessAudio(ctx context.Context, data []byte) error
- func (s *Session) PushDTMF(digit string, end bool)
- func (s *Session) Start(ctx context.Context) error
- type StartMeta
- type TurnEvent
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Command ¶
type Command struct {
Type CommandType `json:"type"`
CallID string `json:"call_id"`
Text string `json:"text,omitempty"`
UtteranceID string `json:"utterance_id,omitempty"`
// StreamEnd on tts.stream marks the final LLM chunk (same as tts.stream.end).
StreamEnd bool `json:"stream_end,omitempty"`
Reason string `json:"reason,omitempty"`
Meta *CommandMeta `json:"meta,omitempty"`
}
Command is the envelope the dialog app sends to the voice plane.
type CommandMeta ¶
type CommandMeta struct {
LLMModel string `json:"llmModel,omitempty"`
LLMFirstMs int `json:"llmFirstMs,omitempty"`
LLMWallMs int `json:"llmWallMs,omitempty"`
UserText string `json:"userText,omitempty"`
}
CommandMeta carries optional turn-level metadata from the dialog app.
type CommandType ¶
type CommandType string
CommandType enumerates messages the dialog app sends to the voice plane.
const ( CmdTTSSpeak CommandType = "tts.speak" CmdTTSStream CommandType = "tts.stream" // LLM streaming token/chunk CmdTTSStreamEnd CommandType = "tts.stream.end" // flush segmenter tail CmdTTSInterrupt CommandType = "tts.interrupt" CmdHangup CommandType = "hangup" )
type Config ¶
type Config struct {
CallID string
Meta StartMeta
Engine asr.Engine
InputCodec string
PCMSampleRate int
TTSService tts.TTSService
// TTSCache wraps TTSService with process-level PCM caching when set.
TTSCache *tts.CacheConfig
OnAudioOut func([]byte) error
OnEvent EventHandler
OnHangup func(reason string)
OnTurn func(TurnEvent)
// EnableVAD enables barge-in during downlink playback (default true).
EnableVAD *bool
// VADConfig overrides barge-in thresholds; nil uses DefaultBargeInVADConfig.
VADConfig *asr.VADConfig
// EnableEchoFilter suppresses uplink while TTS active (default true).
EnableEchoFilter *bool
// EchoTail extends echo suppression after playback ends (default 150ms).
EchoTail time.Duration
// Denoiser optional uplink noise/AEC (RNNoise, WebRTC AEC3, hardware AEC).
// When non-nil, runs after decode and before VAD.
Denoiser asr.Denoiser
// CoalesceUplink buffers small PCM chunks before ASR (default true).
CoalesceUplink *bool
// PaceRealtime paces TTS frames at wall-clock rate for smooth playback (default true).
PaceRealtime *bool
// TTSFrameDuration sets downlink frame size (default 60ms).
TTSFrameDuration time.Duration
// OutputCodec is the downlink wire codec ("pcm" default, "opus" uses AudioSender).
OutputCodec string
// EnableSentenceFilter enables ASR sentence-boundary filtering (default true).
EnableSentenceFilter *bool
// SentenceFilterSimilarity sets dedup threshold; nil → 0.85, explicit 0 disables dedup only.
SentenceFilterSimilarity *float64
// SentenceFilter overrides auto-created filter when set explicitly.
SentenceFilter *asr.SentenceFilter
// EnableStreamSegmenter wires TextSegmenter for tts.stream commands (default true).
EnableStreamSegmenter *bool
TextSegmenterConfig *tts.TextSegmenterConfig
}
Config wires the voice plane for a single call.
type Event ¶
type Event struct {
Type EventType `json:"type"`
CallID string `json:"call_id"`
From string `json:"from,omitempty"`
To string `json:"to,omitempty"`
Codec string `json:"codec,omitempty"`
PCMHz int `json:"pcm_hz,omitempty"`
Reason string `json:"reason,omitempty"`
Text string `json:"text,omitempty"`
Message string `json:"message,omitempty"`
Fatal bool `json:"fatal,omitempty"`
Digit string `json:"digit,omitempty"`
End bool `json:"end,omitempty"`
UtteranceID string `json:"utterance_id,omitempty"`
OK bool `json:"ok,omitempty"`
Target string `json:"target,omitempty"`
}
Event is the envelope the voice plane sends to the dialog app.
type EventHandler ¶
type EventHandler func(Event)
EventHandler receives voice-plane events. Implementations should return quickly; heavy work (LLM calls) belongs in a separate goroutine.
type EventType ¶
type EventType string
EventType enumerates messages the voice plane sends to the dialog app.
const ( EvCallStarted EventType = "call.started" EvCallEnded EventType = "call.ended" EvASRPartial EventType = "asr.partial" EvASRFinal EventType = "asr.final" EvASRError EventType = "asr.error" EvDTMF EventType = "dtmf" EvTTSStarted EventType = "tts.started" EvTTSEnded EventType = "tts.ended" EvTTSInterrupt EventType = "tts.interrupt" EvTransferRequest EventType = "transfer.request" )
type Session ¶
type Session struct {
// contains filtered or unexported fields
}
Session is a transport-agnostic voice call session. It runs uplink ASR and downlink TTS, emitting events to an external dialog app and accepting commands.
func NewSession ¶
NewSession builds and returns a voice session from cfg.
func (*Session) ForwardTransferRequest ¶
ForwardTransferRequest notifies the dialog app of a transfer request.
func (*Session) HandleCommand ¶
HandleCommand processes a dialog-plane command.
func (*Session) IsTTSPlaying ¶
IsTTSPlaying reports whether downlink TTS is active.
func (*Session) ProcessAudio ¶
ProcessAudio feeds one uplink audio chunk (encoded or PCM per session config).