Documentation
¶
Overview ¶
Package client provides a typed HTTP client for talking to a remote SpeechKit Server (the `cmd/speechkit-server` Linux container or any compatible deployment).
Use it from any Go program — desktop, server, or test harness — that wants to consume Dictation, Assist, or Voice Agent endpoints over the network without embedding the kernel.
Auth ¶
The client supports the same bearer-token flow as the server: pass the token via [Options.Token] (or the equivalent env var documented for the server you are calling). For local trusted setups, the server may be configured without a token; the client tolerates that mode.
Timeouts and retries ¶
Per-request timeouts are configured via [Options.Timeout]. The client does not retry by default — host apps own the retry policy because the right behavior differs between idempotent reads (safe to retry) and non-idempotent writes (which may need a higher-level dedupe key).
Index ¶
- type AudioAsset
- type CatalogReadiness
- type Client
- func (c *Client) CatalogContracts(ctx context.Context) ([]framework.ModeContract, error)
- func (c *Client) CatalogProfiles(ctx context.Context, mode string) ([]framework.ProviderProfile, error)
- func (c *Client) CatalogReadiness(ctx context.Context) ([]CatalogReadiness, error)
- func (c *Client) Config(ctx context.Context) (map[string]any, error)
- func (c *Client) CreatePersona(ctx context.Context, payload any) (json.RawMessage, error)
- func (c *Client) CreateRole(ctx context.Context, payload any) (json.RawMessage, error)
- func (c *Client) CreateSequence(ctx context.Context, payload any) (json.RawMessage, error)
- func (c *Client) CreateVoiceAgentSession(ctx context.Context) (*VoiceAgentTicket, error)
- func (c *Client) DeletePersona(ctx context.Context, id string) error
- func (c *Client) DeleteRole(ctx context.Context, id string) error
- func (c *Client) DeleteSequence(ctx context.Context, id string) error
- func (c *Client) DeleteVoiceAgentSession(ctx context.Context, sessionID string) error
- func (c *Client) DialVoiceAgent(ctx context.Context, ticket *VoiceAgentTicket) (*VoiceAgentSession, error)
- func (c *Client) DoJSON(ctx context.Context, method, path string, body, out any) error
- func (c *Client) Persona(ctx context.Context, id string) (json.RawMessage, error)
- func (c *Client) Personas(ctx context.Context) (json.RawMessage, error)
- func (c *Client) PersonasList(ctx context.Context) (json.RawMessage, error)
- func (c *Client) ProviderReadiness(ctx context.Context, id string) (*CatalogReadiness, error)
- func (c *Client) RawJSON(ctx context.Context, method, path string, body any) (json.RawMessage, error)
- func (c *Client) ReplaceVocabularyEntries(ctx context.Context, language string, entries []DictionaryEntry) ([]DictionaryEntry, error)
- func (c *Client) Role(ctx context.Context, id string) (json.RawMessage, error)
- func (c *Client) Roles(ctx context.Context) (json.RawMessage, error)
- func (c *Client) Sequence(ctx context.Context, id string) (json.RawMessage, error)
- func (c *Client) Sequences(ctx context.Context) (json.RawMessage, error)
- func (c *Client) Status(ctx context.Context) (*Status, error)
- func (c *Client) TTSSynthesize(ctx context.Context, input TTSSynthesizeRequest) (*TTSSynthesizeResponse, error)
- func (c *Client) TTSVoices(ctx context.Context) ([]Voice, error)
- func (c *Client) TranscribeFile(ctx context.Context, path string, opts TranscribeOptions) (*TranscribeResponse, error)
- func (c *Client) Transcript(ctx context.Context, id int64) (*Transcript, error)
- func (c *Client) Transcripts(ctx context.Context, limit int) ([]Transcript, error)
- func (c *Client) UpdatePersona(ctx context.Context, id string, payload any) (json.RawMessage, error)
- func (c *Client) UpdateRole(ctx context.Context, id string, payload any) (json.RawMessage, error)
- func (c *Client) UpdateSequence(ctx context.Context, id string, payload any) (json.RawMessage, error)
- func (c *Client) VocabularyEntries(ctx context.Context, language string) ([]DictionaryEntry, error)
- func (c *Client) VoiceAgentSessionSummary(ctx context.Context, id int64) (*VoiceAgentSummary, error)
- func (c *Client) VoiceAgentSessionTranscript(ctx context.Context, id int64) (*VoiceAgentTranscript, error)
- type ConfigSummary
- type DictionaryEntry
- type HTTPError
- type Options
- type PersonaResource
- type RoleResource
- type SequenceResource
- type Status
- type TTSSynthesizeRequest
- type TTSSynthesizeResponse
- type TranscribeOptions
- type TranscribeResponse
- type Transcript
- type Voice
- type VoiceAgentFrame
- type VoiceAgentMessage
- type VoiceAgentSession
- func (s *VoiceAgentSession) AdvanceStep(ctx context.Context, reason string) error
- func (s *VoiceAgentSession) Close() error
- func (s *VoiceAgentSession) ReadMessage(ctx context.Context) (VoiceAgentMessage, error)
- func (s *VoiceAgentSession) SendAudio(ctx context.Context, chunk []byte) error
- func (s *VoiceAgentSession) SendAudioEnd(ctx context.Context) error
- func (s *VoiceAgentSession) SendStart(ctx context.Context, frame VoiceAgentStartFrame) error
- func (s *VoiceAgentSession) SendStop(ctx context.Context) error
- func (s *VoiceAgentSession) SendText(ctx context.Context, text string) error
- func (s *VoiceAgentSession) SessionID() string
- type VoiceAgentStartFrame
- type VoiceAgentSummary
- type VoiceAgentTicket
- type VoiceAgentTranscript
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AudioAsset ¶
type CatalogReadiness ¶
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
func (*Client) CatalogContracts ¶
func (*Client) CatalogProfiles ¶
func (*Client) CatalogReadiness ¶
func (c *Client) CatalogReadiness(ctx context.Context) ([]CatalogReadiness, error)
func (*Client) CreatePersona ¶
func (*Client) CreateRole ¶
func (*Client) CreateSequence ¶
func (*Client) CreateVoiceAgentSession ¶ added in v0.34.1
func (c *Client) CreateVoiceAgentSession(ctx context.Context) (*VoiceAgentTicket, error)
CreateVoiceAgentSession mints a session + ticket. Pair the result with DialVoiceAgent to upgrade to the WebSocket.
func (*Client) DeleteSequence ¶
func (*Client) DeleteVoiceAgentSession ¶ added in v0.34.1
DeleteVoiceAgentSession force-closes a session. Idempotent: 404 is treated as success because the server already removed it.
func (*Client) DialVoiceAgent ¶ added in v0.34.1
func (c *Client) DialVoiceAgent(ctx context.Context, ticket *VoiceAgentTicket) (*VoiceAgentSession, error)
DialVoiceAgent upgrades the ticket to a WebSocket. The caller still needs to send a start frame (see VoiceAgentSession.SendStart) before any other I/O.
func (*Client) PersonasList ¶
func (*Client) ProviderReadiness ¶
func (*Client) ReplaceVocabularyEntries ¶
func (c *Client) ReplaceVocabularyEntries(ctx context.Context, language string, entries []DictionaryEntry) ([]DictionaryEntry, error)
func (*Client) TTSSynthesize ¶
func (c *Client) TTSSynthesize(ctx context.Context, input TTSSynthesizeRequest) (*TTSSynthesizeResponse, error)
func (*Client) TranscribeFile ¶
func (c *Client) TranscribeFile(ctx context.Context, path string, opts TranscribeOptions) (*TranscribeResponse, error)
func (*Client) Transcript ¶
func (*Client) Transcripts ¶
func (*Client) UpdatePersona ¶
func (*Client) UpdateRole ¶
func (*Client) UpdateSequence ¶
func (*Client) VocabularyEntries ¶
func (*Client) VoiceAgentSessionSummary ¶
func (*Client) VoiceAgentSessionTranscript ¶
type ConfigSummary ¶
type DictionaryEntry ¶
type DictionaryEntry struct {
ID int64 `json:"id,omitempty"`
Spoken string `json:"spoken"`
Canonical string `json:"canonical"`
Language string `json:"language"`
Source string `json:"source,omitempty"`
Enabled bool `json:"enabled"`
UsageCount int `json:"usageCount,omitempty"`
CreatedAt time.Time `json:"createdAt,omitempty"`
UpdatedAt time.Time `json:"updatedAt,omitempty"`
}
type PersonaResource ¶
type RoleResource ¶
type SequenceResource ¶
type TTSSynthesizeRequest ¶
type TTSSynthesizeResponse ¶
type TranscribeOptions ¶
type TranscribeResponse ¶
type TranscribeResponse struct {
Text string `json:"text"`
Language string `json:"language,omitempty"`
DurationMs int64 `json:"duration_ms"`
LatencyMs int64 `json:"latency_ms"`
Provider string `json:"provider,omitempty"`
Model string `json:"model,omitempty"`
Confidence float64 `json:"confidence,omitempty"`
}
type Transcript ¶
type Transcript struct {
ID int64 `json:"id"`
Text string `json:"text"`
Language string `json:"language"`
Provider string `json:"provider"`
Model string `json:"model"`
DurationMs int64 `json:"durationMs"`
LatencyMs int64 `json:"latencyMs"`
AudioPath string `json:"audioPath,omitempty"`
Audio *AudioAsset `json:"audio,omitempty"`
CreatedAt time.Time `json:"createdAt"`
OwnerUserID string `json:"ownerUserId,omitempty"`
OwnerOrgID string `json:"ownerOrgId,omitempty"`
OwnerSource string `json:"ownerSource,omitempty"`
}
type VoiceAgentFrame ¶ added in v0.34.1
type VoiceAgentFrame struct {
Type string `json:"type"`
State string `json:"state,omitempty"`
Text string `json:"text,omitempty"`
Done bool `json:"done,omitempty"`
ID string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
Code string `json:"code,omitempty"`
Message string `json:"message,omitempty"`
Reason string `json:"reason,omitempty"`
SequenceID string `json:"sequence_id,omitempty"`
StepID string `json:"step_id,omitempty"`
StepIndex int `json:"step_index,omitempty"`
Status string `json:"status,omitempty"`
}
VoiceAgentFrame is the parsed shape of any inbound text frame. Binary frames carry audio and are returned via VoiceAgentMessage.Audio instead.
type VoiceAgentMessage ¶ added in v0.34.1
type VoiceAgentMessage struct {
Audio []byte
Frame *VoiceAgentFrame
}
VoiceAgentMessage is a single inbound event. Exactly one of Audio or Frame is set per call to ReadMessage; audio chunks are raw PCM S16LE at 24 kHz mono (Gemini Live native output rate).
type VoiceAgentSession ¶ added in v0.34.1
type VoiceAgentSession struct {
// contains filtered or unexported fields
}
VoiceAgentSession is a duplex WebSocket session. Use SendStart first, then SendText / SendAudio / SendAudioEnd to drive the conversation, and ReadMessage in a loop to consume server frames until SessionEnd arrives.
The zero value is unusable — always go through DialVoiceAgent.
func (*VoiceAgentSession) AdvanceStep ¶ added in v0.34.1
func (s *VoiceAgentSession) AdvanceStep(ctx context.Context, reason string) error
AdvanceStep advances the active sequence step. Reason is optional and surfaces in the resulting sequence_step frame.
func (*VoiceAgentSession) Close ¶ added in v0.34.1
func (s *VoiceAgentSession) Close() error
Close releases the WebSocket. Safe to call multiple times.
func (*VoiceAgentSession) ReadMessage ¶ added in v0.34.1
func (s *VoiceAgentSession) ReadMessage(ctx context.Context) (VoiceAgentMessage, error)
ReadMessage blocks until the next inbound WebSocket message arrives. Binary frames are returned as Audio; text frames are decoded into Frame. Returns io.EOF or a websocket close error when the peer goes away.
func (*VoiceAgentSession) SendAudio ¶ added in v0.34.1
func (s *VoiceAgentSession) SendAudio(ctx context.Context, chunk []byte) error
SendAudio forwards a PCM chunk (16 kHz, signed 16-bit little-endian, mono).
func (*VoiceAgentSession) SendAudioEnd ¶ added in v0.34.1
func (s *VoiceAgentSession) SendAudioEnd(ctx context.Context) error
SendAudioEnd marks the end of the current microphone turn. Only needed when automatic activity detection is disabled.
func (*VoiceAgentSession) SendStart ¶ added in v0.34.1
func (s *VoiceAgentSession) SendStart(ctx context.Context, frame VoiceAgentStartFrame) error
SendStart sends the mandatory first control frame. MediaTransport defaults to "websocket" when empty.
func (*VoiceAgentSession) SendStop ¶ added in v0.34.1
func (s *VoiceAgentSession) SendStop(ctx context.Context) error
SendStop asks the server to gracefully end the session.
func (*VoiceAgentSession) SendText ¶ added in v0.34.1
func (s *VoiceAgentSession) SendText(ctx context.Context, text string) error
SendText injects a text turn (the agent will reply via audio + transcript).
func (*VoiceAgentSession) SessionID ¶ added in v0.34.1
func (s *VoiceAgentSession) SessionID() string
SessionID returns the manager-assigned session identifier.
type VoiceAgentStartFrame ¶ added in v0.34.1
type VoiceAgentStartFrame struct {
PersonaID string `json:"persona_id,omitempty"`
RoleID string `json:"role_id,omitempty"`
SequenceID string `json:"sequence_id,omitempty"`
MediaTransport string `json:"media_transport,omitempty"` // "websocket" (default) or "livekit"
Voice string `json:"voice,omitempty"`
Locale string `json:"locale,omitempty"`
Model string `json:"model,omitempty"`
Thinking string `json:"thinking,omitempty"`
SystemPromptOverride string `json:"system_prompt_override,omitempty"`
}
VoiceAgentStartFrame opens a session and binds it to a persona/role/sequence. PersonaID is required if the deployment has any personas; the server's resolver falls back to the configured default persona when empty.
type VoiceAgentSummary ¶
type VoiceAgentTicket ¶ added in v0.34.1
type VoiceAgentTicket struct {
SessionID string `json:"session_id"`
WSURL string `json:"ws_url"`
WSSubprotocol string `json:"ws_subprotocol,omitempty"`
LegacyWSURL string `json:"legacy_ws_url,omitempty"`
Ticket string `json:"ticket"`
ExpiresAt time.Time `json:"expires_at"`
}
VoiceAgentTicket is the minted session envelope returned by POST /v1/voiceagent/sessions. DialVoiceAgent prefers WSSubprotocol so the one-time ticket does not have to ride in the URL.