client

package
v0.40.7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 28, 2026 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Overview

Package client provides a typed HTTP client for talking to a remote SpeechKit Server (the `cmd/speechkit-server` Linux container or any compatible deployment).

Use it from any Go program — desktop, server, or test harness — that wants to consume Dictation, Assist, or Voice Agent endpoints over the network without embedding the kernel.

Auth

The client supports the same bearer-token flow as the server: pass the token via [Options.Token] (or the equivalent env var documented for the server you are calling). For local trusted setups, the server may be configured without a token; the client tolerates that mode.

Timeouts and retries

Per-request timeouts are configured via [Options.Timeout]. The client does not retry by default — host apps own the retry policy because the right behavior differs between idempotent reads (safe to retry) and non-idempotent writes (which may need a higher-level dedupe key).

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AudioAsset

type AudioAsset struct {
	StorageKind string `json:"storageKind"`
	Path        string `json:"path,omitempty"`
	MimeType    string `json:"mimeType"`
	SizeBytes   int64  `json:"sizeBytes"`
	DurationMs  int64  `json:"durationMs"`
}

type CatalogReadiness

type CatalogReadiness = framework.Readiness

type Client

type Client struct {
	// contains filtered or unexported fields
}

func FromEnv

func FromEnv() (*Client, error)

func New

func New(opts Options) (*Client, error)

func (*Client) CatalogContracts

func (c *Client) CatalogContracts(ctx context.Context) ([]framework.ModeContract, error)

func (*Client) CatalogProfiles

func (c *Client) CatalogProfiles(ctx context.Context, mode string) ([]framework.ProviderProfile, error)

func (*Client) CatalogReadiness

func (c *Client) CatalogReadiness(ctx context.Context) ([]CatalogReadiness, error)

func (*Client) Config

func (c *Client) Config(ctx context.Context) (map[string]any, error)

func (*Client) CreatePersona

func (c *Client) CreatePersona(ctx context.Context, payload any) (json.RawMessage, error)

func (*Client) CreateRole

func (c *Client) CreateRole(ctx context.Context, payload any) (json.RawMessage, error)

func (*Client) CreateSequence

func (c *Client) CreateSequence(ctx context.Context, payload any) (json.RawMessage, error)

func (*Client) CreateVoiceAgentSession added in v0.34.1

func (c *Client) CreateVoiceAgentSession(ctx context.Context) (*VoiceAgentTicket, error)

CreateVoiceAgentSession mints a session + ticket. Pair the result with DialVoiceAgent to upgrade to the WebSocket.

func (*Client) DeletePersona

func (c *Client) DeletePersona(ctx context.Context, id string) error

func (*Client) DeleteRole

func (c *Client) DeleteRole(ctx context.Context, id string) error

func (*Client) DeleteSequence

func (c *Client) DeleteSequence(ctx context.Context, id string) error

func (*Client) DeleteVoiceAgentSession added in v0.34.1

func (c *Client) DeleteVoiceAgentSession(ctx context.Context, sessionID string) error

DeleteVoiceAgentSession force-closes a session. Idempotent: 404 is treated as success because the server already removed it.

func (*Client) DialVoiceAgent added in v0.34.1

func (c *Client) DialVoiceAgent(ctx context.Context, ticket *VoiceAgentTicket) (*VoiceAgentSession, error)

DialVoiceAgent upgrades the ticket to a WebSocket. The caller still needs to send a start frame (see VoiceAgentSession.SendStart) before any other I/O.

func (*Client) DoJSON

func (c *Client) DoJSON(ctx context.Context, method, path string, body, out any) error

func (*Client) Persona

func (c *Client) Persona(ctx context.Context, id string) (json.RawMessage, error)

func (*Client) Personas

func (c *Client) Personas(ctx context.Context) (json.RawMessage, error)

func (*Client) PersonasList

func (c *Client) PersonasList(ctx context.Context) (json.RawMessage, error)

func (*Client) ProviderReadiness

func (c *Client) ProviderReadiness(ctx context.Context, id string) (*CatalogReadiness, error)

func (*Client) RawJSON

func (c *Client) RawJSON(ctx context.Context, method, path string, body any) (json.RawMessage, error)

func (*Client) ReplaceVocabularyEntries

func (c *Client) ReplaceVocabularyEntries(ctx context.Context, language string, entries []DictionaryEntry) ([]DictionaryEntry, error)

func (*Client) Role

func (c *Client) Role(ctx context.Context, id string) (json.RawMessage, error)

func (*Client) Roles

func (c *Client) Roles(ctx context.Context) (json.RawMessage, error)

func (*Client) Sequence

func (c *Client) Sequence(ctx context.Context, id string) (json.RawMessage, error)

func (*Client) Sequences

func (c *Client) Sequences(ctx context.Context) (json.RawMessage, error)

func (*Client) Status

func (c *Client) Status(ctx context.Context) (*Status, error)

func (*Client) TTSSynthesize

func (c *Client) TTSSynthesize(ctx context.Context, input TTSSynthesizeRequest) (*TTSSynthesizeResponse, error)

func (*Client) TTSVoices

func (c *Client) TTSVoices(ctx context.Context) ([]Voice, error)

func (*Client) TranscribeFile

func (c *Client) TranscribeFile(ctx context.Context, path string, opts TranscribeOptions) (*TranscribeResponse, error)

func (*Client) Transcript

func (c *Client) Transcript(ctx context.Context, id int64) (*Transcript, error)

func (*Client) Transcripts

func (c *Client) Transcripts(ctx context.Context, limit int) ([]Transcript, error)

func (*Client) UpdatePersona

func (c *Client) UpdatePersona(ctx context.Context, id string, payload any) (json.RawMessage, error)

func (*Client) UpdateRole

func (c *Client) UpdateRole(ctx context.Context, id string, payload any) (json.RawMessage, error)

func (*Client) UpdateSequence

func (c *Client) UpdateSequence(ctx context.Context, id string, payload any) (json.RawMessage, error)

func (*Client) VocabularyEntries

func (c *Client) VocabularyEntries(ctx context.Context, language string) ([]DictionaryEntry, error)

func (*Client) VoiceAgentSessionSummary

func (c *Client) VoiceAgentSessionSummary(ctx context.Context, id int64) (*VoiceAgentSummary, error)

func (*Client) VoiceAgentSessionTranscript

func (c *Client) VoiceAgentSessionTranscript(ctx context.Context, id int64) (*VoiceAgentTranscript, error)

type ConfigSummary

type ConfigSummary map[string]any

type DictionaryEntry

type DictionaryEntry struct {
	ID         int64     `json:"id,omitempty"`
	Spoken     string    `json:"spoken"`
	Canonical  string    `json:"canonical"`
	Language   string    `json:"language"`
	Source     string    `json:"source,omitempty"`
	Enabled    bool      `json:"enabled"`
	UsageCount int       `json:"usageCount,omitempty"`
	CreatedAt  time.Time `json:"createdAt,omitempty"`
	UpdatedAt  time.Time `json:"updatedAt,omitempty"`
}

type HTTPError

type HTTPError struct {
	StatusCode int
	Body       string
}

func (HTTPError) Error

func (e HTTPError) Error() string

type Options

type Options struct {
	BaseURL    string
	Token      string
	HTTPClient *http.Client
	UserAgent  string
	Timeout    time.Duration
}

type PersonaResource

type PersonaResource map[string]any

type RoleResource

type RoleResource map[string]any

type SequenceResource

type SequenceResource map[string]any

type Status

type Status struct {
	Status        string         `json:"status"`
	Components    map[string]any `json:"components"`
	UptimeSeconds int64          `json:"uptime_seconds"`
	Version       string         `json:"version"`
}

type TTSSynthesizeRequest

type TTSSynthesizeRequest struct {
	Text   string  `json:"text"`
	Locale string  `json:"locale,omitempty"`
	Voice  string  `json:"voice,omitempty"`
	Speed  float64 `json:"speed,omitempty"`
	Format string  `json:"format,omitempty"`
}

type TTSSynthesizeResponse

type TTSSynthesizeResponse struct {
	AudioBase64 string `json:"audio_base64"`
	Format      string `json:"format"`
	SampleRate  int    `json:"sample_rate,omitempty"`
	DurationMs  int64  `json:"duration_ms,omitempty"`
	Provider    string `json:"provider,omitempty"`
	Voice       string `json:"voice,omitempty"`
}

type TranscribeOptions

type TranscribeOptions struct {
	Language string
	Model    string
	Prompt   string
}

type TranscribeResponse

type TranscribeResponse struct {
	Text       string  `json:"text"`
	Language   string  `json:"language,omitempty"`
	DurationMs int64   `json:"duration_ms"`
	LatencyMs  int64   `json:"latency_ms"`
	Provider   string  `json:"provider,omitempty"`
	Model      string  `json:"model,omitempty"`
	Confidence float64 `json:"confidence,omitempty"`
}

type Transcript

type Transcript struct {
	ID          int64       `json:"id"`
	Text        string      `json:"text"`
	Language    string      `json:"language"`
	Provider    string      `json:"provider"`
	Model       string      `json:"model"`
	DurationMs  int64       `json:"durationMs"`
	LatencyMs   int64       `json:"latencyMs"`
	AudioPath   string      `json:"audioPath,omitempty"`
	Audio       *AudioAsset `json:"audio,omitempty"`
	CreatedAt   time.Time   `json:"createdAt"`
	OwnerUserID string      `json:"ownerUserId,omitempty"`
	OwnerOrgID  string      `json:"ownerOrgId,omitempty"`
	OwnerSource string      `json:"ownerSource,omitempty"`
}

type Voice

type Voice struct {
	Provider  string `json:"provider"`
	ID        string `json:"id"`
	Locale    string `json:"locale"`
	Default   bool   `json:"default"`
	Discovery string `json:"discovery,omitempty"`
}

type VoiceAgentFrame added in v0.34.1

type VoiceAgentFrame struct {
	Type       string `json:"type"`
	State      string `json:"state,omitempty"`
	Text       string `json:"text,omitempty"`
	Done       bool   `json:"done,omitempty"`
	ID         string `json:"id,omitempty"`
	Name       string `json:"name,omitempty"`
	Code       string `json:"code,omitempty"`
	Message    string `json:"message,omitempty"`
	Reason     string `json:"reason,omitempty"`
	SequenceID string `json:"sequence_id,omitempty"`
	StepID     string `json:"step_id,omitempty"`
	StepIndex  int    `json:"step_index,omitempty"`
	Status     string `json:"status,omitempty"`
}

VoiceAgentFrame is the parsed shape of any inbound text frame. Binary frames carry audio and are returned via VoiceAgentMessage.Audio instead.

type VoiceAgentMessage added in v0.34.1

type VoiceAgentMessage struct {
	Audio []byte
	Frame *VoiceAgentFrame
}

VoiceAgentMessage is a single inbound event. Exactly one of Audio or Frame is set per call to ReadMessage; audio chunks are raw PCM S16LE at 24 kHz mono (Gemini Live native output rate).

type VoiceAgentSession added in v0.34.1

type VoiceAgentSession struct {
	// contains filtered or unexported fields
}

VoiceAgentSession is a duplex WebSocket session. Use SendStart first, then SendText / SendAudio / SendAudioEnd to drive the conversation, and ReadMessage in a loop to consume server frames until SessionEnd arrives.

The zero value is unusable — always go through DialVoiceAgent.

func (*VoiceAgentSession) AdvanceStep added in v0.34.1

func (s *VoiceAgentSession) AdvanceStep(ctx context.Context, reason string) error

AdvanceStep advances the active sequence step. Reason is optional and surfaces in the resulting sequence_step frame.

func (*VoiceAgentSession) Close added in v0.34.1

func (s *VoiceAgentSession) Close() error

Close releases the WebSocket. Safe to call multiple times.

func (*VoiceAgentSession) ReadMessage added in v0.34.1

func (s *VoiceAgentSession) ReadMessage(ctx context.Context) (VoiceAgentMessage, error)

ReadMessage blocks until the next inbound WebSocket message arrives. Binary frames are returned as Audio; text frames are decoded into Frame. Returns io.EOF or a websocket close error when the peer goes away.

func (*VoiceAgentSession) SendAudio added in v0.34.1

func (s *VoiceAgentSession) SendAudio(ctx context.Context, chunk []byte) error

SendAudio forwards a PCM chunk (16 kHz, signed 16-bit little-endian, mono).

func (*VoiceAgentSession) SendAudioEnd added in v0.34.1

func (s *VoiceAgentSession) SendAudioEnd(ctx context.Context) error

SendAudioEnd marks the end of the current microphone turn. Only needed when automatic activity detection is disabled.

func (*VoiceAgentSession) SendStart added in v0.34.1

func (s *VoiceAgentSession) SendStart(ctx context.Context, frame VoiceAgentStartFrame) error

SendStart sends the mandatory first control frame. MediaTransport defaults to "websocket" when empty.

func (*VoiceAgentSession) SendStop added in v0.34.1

func (s *VoiceAgentSession) SendStop(ctx context.Context) error

SendStop asks the server to gracefully end the session.

func (*VoiceAgentSession) SendText added in v0.34.1

func (s *VoiceAgentSession) SendText(ctx context.Context, text string) error

SendText injects a text turn (the agent will reply via audio + transcript).

func (*VoiceAgentSession) SessionID added in v0.34.1

func (s *VoiceAgentSession) SessionID() string

SessionID returns the manager-assigned session identifier.

type VoiceAgentStartFrame added in v0.34.1

type VoiceAgentStartFrame struct {
	PersonaID            string `json:"persona_id,omitempty"`
	RoleID               string `json:"role_id,omitempty"`
	SequenceID           string `json:"sequence_id,omitempty"`
	MediaTransport       string `json:"media_transport,omitempty"` // "websocket" (default) or "livekit"
	Voice                string `json:"voice,omitempty"`
	Locale               string `json:"locale,omitempty"`
	Model                string `json:"model,omitempty"`
	Thinking             string `json:"thinking,omitempty"`
	SystemPromptOverride string `json:"system_prompt_override,omitempty"`
}

VoiceAgentStartFrame opens a session and binds it to a persona/role/sequence. PersonaID is required if the deployment has any personas; the server's resolver falls back to the configured default persona when empty.

type VoiceAgentSummary

type VoiceAgentSummary struct {
	ID        int64          `json:"id"`
	Summary   map[string]any `json:"summary"`
	Language  string         `json:"language"`
	CreatedAt time.Time      `json:"created_at"`
}

type VoiceAgentTicket added in v0.34.1

type VoiceAgentTicket struct {
	SessionID     string    `json:"session_id"`
	WSURL         string    `json:"ws_url"`
	WSSubprotocol string    `json:"ws_subprotocol,omitempty"`
	LegacyWSURL   string    `json:"legacy_ws_url,omitempty"`
	Ticket        string    `json:"ticket"`
	ExpiresAt     time.Time `json:"expires_at"`
}

VoiceAgentTicket is the minted session envelope returned by POST /v1/voiceagent/sessions. DialVoiceAgent prefers WSSubprotocol so the one-time ticket does not have to ride in the URL.

type VoiceAgentTranscript

type VoiceAgentTranscript struct {
	ID         int64            `json:"id"`
	Transcript string           `json:"transcript"`
	Turns      []map[string]any `json:"turns,omitempty"`
	Language   string           `json:"language"`
	CreatedAt  time.Time        `json:"created_at"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL