Documentation
¶
Index ¶
Constants ¶
const ( // SampleRate is the audio sample rate in Hz required by Opus. SampleRate = 48000 // Channels is the number of audio channels (mono). Channels = 1 // FrameSize is the number of samples in a 20ms frame at 48kHz. FrameSize = 960 )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AgentState ¶
type AgentState string
AgentState represents the server-reported state of the voice agent pipeline.
const ( AgentListening AgentState = "listening" AgentThinking AgentState = "thinking" AgentSpeaking AgentState = "speaking" )
type Client ¶
type Client struct {
// LocalTrack is the outbound audio track you write RTP packets to.
// It is created during Connect() and available afterwards.
LocalTrack *webrtc.TrackLocalStaticRTP
// RemoteTrack receives inbound audio from the agent.
// It is delivered via the RemoteTrackCh channel after the connection is established.
RemoteTrackCh chan *webrtc.TrackRemote
// contains filtered or unexported fields
}
Client manages a WebRTC connection to a Voice Agent server via WHIP signaling. It handles peer connection setup, data channel event handling, and provides access to the local/remote audio tracks for custom audio I/O.
func NewClient ¶
func NewClient(cfg Config, events EventHandler) *Client
NewClient creates a new voice agent client with the given configuration and event handlers.
func (*Client) Connect ¶
Connect establishes a WebRTC connection to the voice agent server using WHIP. It creates a local audio track (Opus), performs WHIP signaling, and sets up the data channel for receiving transcript/response events.
After Connect returns, write audio to LocalTrack and read agent audio from RemoteTrackCh.
func (*Client) Disconnect ¶
func (c *Client) Disconnect()
Disconnect tears down the WebRTC connection and frees resources.
func (*Client) RecvPCM ¶
RecvPCM blocks until a frame of audio is received from the agent, decodes the Opus payload, and writes PCM int16 samples into pcm. The pcm slice should have capacity for at least FrameSize (960) samples. Returns the number of decoded samples.
func (*Client) SendPCM ¶
SendPCM encodes a 20ms frame of PCM int16 audio (mono, 48kHz, 960 samples) and sends it as an RTP/Opus packet to the voice agent server.
func (*Client) Status ¶
func (c *Client) Status() ConnectionStatus
Status returns the current connection status.
func (*Client) Transcript ¶
func (c *Client) Transcript() []TranscriptEntry
Transcript returns the current conversation transcript.
type Config ¶
type Config struct {
// WHIPEndpoint is the URL of the WHIP signaling endpoint.
// Defaults to "http://localhost:8080/whip".
WHIPEndpoint string
// Token is an optional JWT token for authenticating with the WHIP endpoint.
Token string
// TokenURL is the URL of a token endpoint. If set, the client will POST
// to this URL to fetch a JWT before each WHIP connection. Overrides Token.
TokenURL string
// APIKey is sent as a Bearer header when fetching a token from TokenURL.
APIKey string
// ICEServers configures the ICE servers for the WebRTC connection.
// Defaults to Google's public STUN server.
ICEServers []webrtc.ICEServer
// Metadata is an optional set of key-value pairs appended to the WHIP
// endpoint as query parameters (e.g. direction=outbound).
Metadata map[string]string
}
Config holds the configuration for a StreamCoreAIClient.
type ConnectionStatus ¶
type ConnectionStatus string
ConnectionStatus represents the current state of the voice agent connection.
const ( StatusIdle ConnectionStatus = "idle" StatusConnecting ConnectionStatus = "connecting" StatusConnected ConnectionStatus = "connected" StatusError ConnectionStatus = "error" StatusDisconnected ConnectionStatus = "disconnected" )
type DataChannelMessage ¶
type DataChannelMessage struct {
Type string `json:"type"` // "transcript", "response", "error", "timing", or "state"
Text string `json:"text,omitempty"`
Final bool `json:"final,omitempty"`
Message string `json:"message,omitempty"` // for error type
Stage string `json:"stage,omitempty"` // for timing type
Ms int `json:"ms,omitempty"` // for timing type
State string `json:"state,omitempty"` // for state type
}
DataChannelMessage represents a message received on the data channel.
type EventHandler ¶
type EventHandler struct {
// OnStatusChange is called when the connection status changes.
OnStatusChange func(status ConnectionStatus)
// OnTranscript is called when a new or updated transcript entry is received.
OnTranscript func(entry TranscriptEntry, all []TranscriptEntry)
// OnError is called when an error occurs.
OnError func(err error)
// OnTiming is called when a timing/latency event is received from the server.
OnTiming func(event TimingEvent)
// OnAgentStateChange is called when the server reports an agent state transition.
OnAgentStateChange func(state AgentState)
// OnDataChannelMessage is called for every raw data channel message.
// This is optional and useful for custom message handling.
OnDataChannelMessage func(msg DataChannelMessage)
}
EventHandler defines callbacks for voice agent events.
type TimingEvent ¶
TimingEvent represents a single latency measurement from the server pipeline.
type TranscriptEntry ¶
type TranscriptEntry struct {
Role string `json:"role"` // "user" or "assistant"
Text string `json:"text"`
Partial bool `json:"partial,omitempty"`
}
TranscriptEntry represents a single transcript message in the conversation.
type WHIPResult ¶
WHIPResult holds the response from a WHIP signaling exchange.