Documentation
¶
Overview ¶
Package openai provides OpenAI LLM provider integration.
Package openai provides OpenAI Realtime API streaming support.
Package openai provides OpenAI Realtime API streaming support.
Package openai provides OpenAI Realtime API streaming support.
Package openai provides OpenAI Realtime API streaming support.
Package openai provides OpenAI Realtime API streaming support.
Package openai provides OpenAI Realtime API streaming support.
Package openai provides OpenAI Realtime API streaming support.
Index ¶
- Constants
- func ParseServerEvent(data []byte) (interface{}, error)
- func RealtimeStreamingCapabilities() providers.StreamingCapabilities
- type ClientEvent
- type ConversationContent
- type ConversationItem
- type ConversationItemCreateEvent
- type ConversationItemCreatedEvent
- type ConversationItemInputAudioTranscriptionCompletedEvent
- type ConversationItemInputAudioTranscriptionFailedEvent
- type EmbeddingOption
- type EmbeddingProvider
- type ErrorDetail
- type ErrorEvent
- type InputAudioBufferAppendEvent
- type InputAudioBufferClearEvent
- type InputAudioBufferClearedEvent
- type InputAudioBufferCommitEvent
- type InputAudioBufferCommittedEvent
- type InputAudioBufferSpeechStartedEvent
- type InputAudioBufferSpeechStoppedEvent
- type Provider
- func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo
- func (p *Provider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)
- func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities
- func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities
- func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *Provider) SupportsStreamInput() []string
- type RateLimit
- type RateLimitsUpdatedEvent
- type RealtimeSession
- func (s *RealtimeSession) CancelResponse() error
- func (s *RealtimeSession) ClearAudioBuffer() error
- func (s *RealtimeSession) Close() error
- func (s *RealtimeSession) CommitAudioBuffer() error
- func (s *RealtimeSession) Done() <-chan struct{}
- func (s *RealtimeSession) EndInput()
- func (s *RealtimeSession) Error() error
- func (s *RealtimeSession) Response() <-chan providers.StreamChunk
- func (s *RealtimeSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
- func (s *RealtimeSession) SendSystemContext(ctx context.Context, text string) error
- func (s *RealtimeSession) SendText(ctx context.Context, text string) error
- func (s *RealtimeSession) SendToolResponse(ctx context.Context, toolCallID, result string) error
- func (s *RealtimeSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error
- func (s *RealtimeSession) TriggerResponse(config *ResponseConfig) error
- type RealtimeSessionConfig
- type RealtimeToolDef
- type RealtimeToolDefinition
- type RealtimeWebSocket
- func (ws *RealtimeWebSocket) Close() error
- func (ws *RealtimeWebSocket) Connect(ctx context.Context) error
- func (ws *RealtimeWebSocket) ConnectWithRetry(ctx context.Context) error
- func (ws *RealtimeWebSocket) IsClosed() bool
- func (ws *RealtimeWebSocket) Receive(ctx context.Context) ([]byte, error)
- func (ws *RealtimeWebSocket) ReceiveLoop(ctx context.Context, msgCh chan<- []byte) error
- func (ws *RealtimeWebSocket) Send(msg interface{}) error
- func (ws *RealtimeWebSocket) StartHeartbeat(ctx context.Context, interval time.Duration)
- type ResponseAudioDeltaEvent
- type ResponseAudioDoneEvent
- type ResponseAudioTranscriptDeltaEvent
- type ResponseAudioTranscriptDoneEvent
- type ResponseCancelEvent
- type ResponseConfig
- type ResponseContentPartAddedEvent
- type ResponseContentPartDoneEvent
- type ResponseCreateEvent
- type ResponseCreatedEvent
- type ResponseDoneEvent
- type ResponseFunctionCallArgumentsDeltaEvent
- type ResponseFunctionCallArgumentsDoneEvent
- type ResponseInfo
- type ResponseOutputItemAddedEvent
- type ResponseOutputItemDoneEvent
- type ResponseTextDeltaEvent
- type ResponseTextDoneEvent
- type ServerEvent
- type SessionConfig
- type SessionCreatedEvent
- type SessionInfo
- type SessionUpdateEvent
- type SessionUpdatedEvent
- type ToolProvider
- func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)
- func (p *ToolProvider) PredictMultimodalWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, ...) (providers.PredictionResponse, []types.MessageToolCall, error)
- func (p *ToolProvider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, ...) (<-chan providers.StreamChunk, error)
- func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, ...) (providers.PredictionResponse, []types.MessageToolCall, error)
- type TranscriptionConfig
- type TurnDetectionConfig
- type UsageInfo
Constants ¶
const ( // DefaultEmbeddingModel is the default model for embeddings DefaultEmbeddingModel = "text-embedding-3-small" // EmbeddingModelAda002 is the legacy ada-002 model EmbeddingModelAda002 = "text-embedding-ada-002" // EmbeddingModel3Small is the newer small model with better performance EmbeddingModel3Small = "text-embedding-3-small" // EmbeddingModel3Large is the large model with highest quality EmbeddingModel3Large = "text-embedding-3-large" )
Embedding model constants
const ( // RealtimeAPIEndpoint is the base WebSocket endpoint for OpenAI Realtime API. RealtimeAPIEndpoint = "wss://api.openai.com/v1/realtime" // RealtimeBetaHeader is required for the Realtime API. RealtimeBetaHeader = "realtime=v1" // Default audio configuration for OpenAI Realtime API. // OpenAI Realtime uses 24kHz 16-bit PCM mono audio. DefaultRealtimeSampleRate = 24000 DefaultRealtimeChannels = 1 DefaultRealtimeBitDepth = 16 )
Realtime API constants
Variables ¶
This section is empty.
Functions ¶
func ParseServerEvent ¶ added in v1.1.6
ParseServerEvent parses a raw JSON message into the appropriate event type.
func RealtimeStreamingCapabilities ¶ added in v1.1.6
func RealtimeStreamingCapabilities() providers.StreamingCapabilities
RealtimeStreamingCapabilities returns the streaming capabilities for OpenAI Realtime API.
Types ¶
type ClientEvent ¶ added in v1.1.6
ClientEvent is the base structure for all client events.
type ConversationContent ¶ added in v1.1.6
type ConversationContent struct {
Type string `json:"type"` // "input_text", "input_audio", "text", "audio"
Text string `json:"text,omitempty"`
Audio string `json:"audio,omitempty"` // Base64-encoded
Transcript string `json:"transcript,omitempty"` // For audio content
}
ConversationContent represents content within a conversation item.
type ConversationItem ¶ added in v1.1.6
type ConversationItem struct {
ID string `json:"id,omitempty"`
Type string `json:"type"` // "message", "function_call", "function_call_output"
Status string `json:"status,omitempty"`
Role string `json:"role,omitempty"` // "user", "assistant", "system"
Content []ConversationContent `json:"content,omitempty"`
CallID string `json:"call_id,omitempty"` // For function_call_output
Output string `json:"output,omitempty"` // For function_call_output
Name string `json:"name,omitempty"` // For function_call
Arguments string `json:"arguments,omitempty"` // For function_call
}
ConversationItem represents an item in the conversation.
type ConversationItemCreateEvent ¶ added in v1.1.6
type ConversationItemCreateEvent struct {
ClientEvent
PreviousItemID string `json:"previous_item_id,omitempty"`
Item ConversationItem `json:"item"`
}
ConversationItemCreateEvent adds an item to the conversation.
type ConversationItemCreatedEvent ¶ added in v1.1.6
type ConversationItemCreatedEvent struct {
ServerEvent
PreviousItemID string `json:"previous_item_id"`
Item ConversationItem `json:"item"`
}
ConversationItemCreatedEvent confirms an item was added.
type ConversationItemInputAudioTranscriptionCompletedEvent ¶ added in v1.1.6
type ConversationItemInputAudioTranscriptionCompletedEvent struct {
ServerEvent
ItemID string `json:"item_id"`
ContentIndex int `json:"content_index"`
Transcript string `json:"transcript"`
}
ConversationItemInputAudioTranscriptionCompletedEvent provides transcription.
type ConversationItemInputAudioTranscriptionFailedEvent ¶ added in v1.1.6
type ConversationItemInputAudioTranscriptionFailedEvent struct {
ServerEvent
ItemID string `json:"item_id"`
ContentIndex int `json:"content_index"`
Error ErrorDetail `json:"error"`
}
ConversationItemInputAudioTranscriptionFailedEvent indicates transcription failed.
type EmbeddingOption ¶ added in v1.1.6
type EmbeddingOption func(*EmbeddingProvider)
EmbeddingOption configures the EmbeddingProvider.
func WithEmbeddingAPIKey ¶ added in v1.1.6
func WithEmbeddingAPIKey(key string) EmbeddingOption
WithEmbeddingAPIKey sets the API key explicitly.
func WithEmbeddingBaseURL ¶ added in v1.1.6
func WithEmbeddingBaseURL(url string) EmbeddingOption
WithEmbeddingBaseURL sets a custom base URL (for Azure or proxies).
func WithEmbeddingHTTPClient ¶ added in v1.1.6
func WithEmbeddingHTTPClient(client *http.Client) EmbeddingOption
WithEmbeddingHTTPClient sets a custom HTTP client.
func WithEmbeddingModel ¶ added in v1.1.6
func WithEmbeddingModel(model string) EmbeddingOption
WithEmbeddingModel sets the embedding model.
type EmbeddingProvider ¶ added in v1.1.6
type EmbeddingProvider struct {
*providers.BaseEmbeddingProvider
}
EmbeddingProvider implements embedding generation via OpenAI API.
func NewEmbeddingProvider ¶ added in v1.1.6
func NewEmbeddingProvider(opts ...EmbeddingOption) (*EmbeddingProvider, error)
NewEmbeddingProvider creates an OpenAI embedding provider.
func (*EmbeddingProvider) Embed ¶ added in v1.1.6
func (p *EmbeddingProvider) Embed( ctx context.Context, req providers.EmbeddingRequest, ) (providers.EmbeddingResponse, error)
Embed generates embeddings for the given texts.
func (*EmbeddingProvider) EstimateCost ¶ added in v1.1.6
func (p *EmbeddingProvider) EstimateCost(tokens int) float64
EstimateCost estimates the cost for embedding the given number of tokens.
type ErrorDetail ¶ added in v1.1.6
type ErrorDetail struct {
Type string `json:"type"`
Code string `json:"code"`
Message string `json:"message"`
Param string `json:"param,omitempty"`
EventID string `json:"event_id,omitempty"`
}
ErrorDetail contains error information.
type ErrorEvent ¶ added in v1.1.6
type ErrorEvent struct {
ServerEvent
Error ErrorDetail `json:"error"`
}
ErrorEvent indicates an error occurred.
type InputAudioBufferAppendEvent ¶ added in v1.1.6
type InputAudioBufferAppendEvent struct {
ClientEvent
Audio string `json:"audio"` // Base64-encoded audio data
}
InputAudioBufferAppendEvent appends audio to the input buffer.
type InputAudioBufferClearEvent ¶ added in v1.1.6
type InputAudioBufferClearEvent struct {
ClientEvent
}
InputAudioBufferClearEvent clears the audio buffer.
type InputAudioBufferClearedEvent ¶ added in v1.1.6
type InputAudioBufferClearedEvent struct {
ServerEvent
}
InputAudioBufferClearedEvent confirms audio buffer was cleared.
type InputAudioBufferCommitEvent ¶ added in v1.1.6
type InputAudioBufferCommitEvent struct {
ClientEvent
}
InputAudioBufferCommitEvent commits the audio buffer for processing.
type InputAudioBufferCommittedEvent ¶ added in v1.1.6
type InputAudioBufferCommittedEvent struct {
ServerEvent
PreviousItemID string `json:"previous_item_id"`
ItemID string `json:"item_id"`
}
InputAudioBufferCommittedEvent confirms audio buffer was committed.
type InputAudioBufferSpeechStartedEvent ¶ added in v1.1.6
type InputAudioBufferSpeechStartedEvent struct {
ServerEvent
AudioStartMs int `json:"audio_start_ms"`
ItemID string `json:"item_id"`
}
InputAudioBufferSpeechStartedEvent indicates speech was detected.
type InputAudioBufferSpeechStoppedEvent ¶ added in v1.1.6
type InputAudioBufferSpeechStoppedEvent struct {
ServerEvent
AudioEndMs int `json:"audio_end_ms"`
ItemID string `json:"item_id"`
}
InputAudioBufferSpeechStoppedEvent indicates speech ended.
type Provider ¶ added in v1.1.3
type Provider struct {
providers.BaseProvider
// contains filtered or unexported fields
}
OpenAIProvider implements the Provider interface for OpenAI
func NewProvider ¶ added in v1.1.3
func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *Provider
NewProvider creates a new OpenAI provider
func (*Provider) CalculateCost ¶ added in v1.1.3
CalculateCost calculates detailed cost breakdown including optional cached tokens
func (*Provider) CreateStreamSession ¶ added in v1.1.6
func (p *Provider) CreateStreamSession( ctx context.Context, req *providers.StreamingInputConfig, ) (providers.StreamInputSession, error)
CreateStreamSession creates a new bidirectional streaming session with OpenAI Realtime API.
The session supports real-time audio input/output with the following features: - Bidirectional audio streaming (send and receive audio simultaneously) - Server-side voice activity detection (VAD) for automatic turn detection - Function/tool calling during the streaming session - Input and output audio transcription
Audio Format: OpenAI Realtime API uses 24kHz 16-bit PCM mono audio by default. The session automatically handles base64 encoding/decoding of audio data.
Example usage:
session, err := provider.CreateStreamSession(ctx, &providers.StreamingInputConfig{
Config: types.StreamingMediaConfig{
Type: types.ContentTypeAudio,
SampleRate: 24000,
Encoding: "pcm16",
Channels: 1,
},
SystemInstruction: "You are a helpful assistant.",
})
func (*Provider) GetMultimodalCapabilities ¶ added in v1.1.3
func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities
GetMultimodalCapabilities returns OpenAI's multimodal capabilities
func (*Provider) GetStreamingCapabilities ¶ added in v1.1.6
func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities
GetStreamingCapabilities returns detailed information about OpenAI's streaming support.
func (*Provider) Predict ¶ added in v1.1.3
func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
Predict sends a predict request to OpenAI
func (*Provider) PredictMultimodal ¶ added in v1.1.3
func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
PredictMultimodal performs a predict request with multimodal content
func (*Provider) PredictMultimodalStream ¶ added in v1.1.3
func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
PredictMultimodalStream performs a streaming predict request with multimodal content
func (*Provider) PredictStream ¶ added in v1.1.3
func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
PredictStream streams a predict response from OpenAI
func (*Provider) SupportsStreamInput ¶ added in v1.1.6
SupportsStreamInput returns the media types supported for streaming input.
type RateLimit ¶ added in v1.1.6
type RateLimit struct {
Name string `json:"name"`
Limit int `json:"limit"`
Remaining int `json:"remaining"`
ResetSeconds float64 `json:"reset_seconds"`
}
RateLimit contains rate limit details.
type RateLimitsUpdatedEvent ¶ added in v1.1.6
type RateLimitsUpdatedEvent struct {
ServerEvent
RateLimits []RateLimit `json:"rate_limits"`
}
RateLimitsUpdatedEvent provides rate limit information.
type RealtimeSession ¶ added in v1.1.6
type RealtimeSession struct {
// contains filtered or unexported fields
}
RealtimeSession implements StreamInputSession for OpenAI Realtime API.
func NewRealtimeSession ¶ added in v1.1.6
func NewRealtimeSession(ctx context.Context, apiKey string, config *RealtimeSessionConfig) (*RealtimeSession, error)
NewRealtimeSession creates a new OpenAI Realtime streaming session.
func (*RealtimeSession) CancelResponse ¶ added in v1.1.6
func (s *RealtimeSession) CancelResponse() error
CancelResponse cancels an in-progress response.
func (*RealtimeSession) ClearAudioBuffer ¶ added in v1.1.6
func (s *RealtimeSession) ClearAudioBuffer() error
ClearAudioBuffer clears the current audio buffer.
func (*RealtimeSession) Close ¶ added in v1.1.6
func (s *RealtimeSession) Close() error
Close closes the session.
func (*RealtimeSession) CommitAudioBuffer ¶ added in v1.1.6
func (s *RealtimeSession) CommitAudioBuffer() error
CommitAudioBuffer commits the current audio buffer for processing.
func (*RealtimeSession) Done ¶ added in v1.1.6
func (s *RealtimeSession) Done() <-chan struct{}
Done returns a channel that's closed when the session ends.
func (*RealtimeSession) EndInput ¶ added in v1.1.6
func (s *RealtimeSession) EndInput()
EndInput signals the end of user input. For OpenAI Realtime with server VAD, this commits the audio buffer. For manual turn control, this commits and triggers a response.
func (*RealtimeSession) Error ¶ added in v1.1.6
func (s *RealtimeSession) Error() error
Error returns any error that occurred during the session.
func (*RealtimeSession) Response ¶ added in v1.1.6
func (s *RealtimeSession) Response() <-chan providers.StreamChunk
Response returns the channel for receiving responses.
func (*RealtimeSession) SendChunk ¶ added in v1.1.6
func (s *RealtimeSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
SendChunk sends an audio chunk to the server.
func (*RealtimeSession) SendSystemContext ¶ added in v1.1.6
func (s *RealtimeSession) SendSystemContext(ctx context.Context, text string) error
SendSystemContext sends a text message as context without completing the turn.
func (*RealtimeSession) SendText ¶ added in v1.1.6
func (s *RealtimeSession) SendText(ctx context.Context, text string) error
SendText sends a text message and triggers a response.
func (*RealtimeSession) SendToolResponse ¶ added in v1.1.6
func (s *RealtimeSession) SendToolResponse(ctx context.Context, toolCallID, result string) error
SendToolResponse sends the result of a tool execution back to the model.
func (*RealtimeSession) SendToolResponses ¶ added in v1.1.6
func (s *RealtimeSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error
SendToolResponses sends multiple tool results at once (for parallel tool calls).
func (*RealtimeSession) TriggerResponse ¶ added in v1.1.6
func (s *RealtimeSession) TriggerResponse(config *ResponseConfig) error
TriggerResponse manually triggers a response from the model.
type RealtimeSessionConfig ¶ added in v1.1.6
type RealtimeSessionConfig struct {
// Model specifies the model to use (e.g., "gpt-4o-realtime-preview").
Model string
// Modalities specifies the input/output modalities.
// Valid values: "text", "audio"
// Default: ["text", "audio"]
Modalities []string
// Instructions is the system prompt for the session.
Instructions string
// Voice selects the voice for audio output.
// Options: "alloy", "echo", "fable", "onyx", "nova", "shimmer"
// Default: "alloy"
Voice string
// InputAudioFormat specifies the format for input audio.
// Options: "pcm16", "g711_ulaw", "g711_alaw"
// Default: "pcm16"
InputAudioFormat string
// OutputAudioFormat specifies the format for output audio.
// Options: "pcm16", "g711_ulaw", "g711_alaw"
// Default: "pcm16"
OutputAudioFormat string
// InputAudioTranscription configures transcription of input audio.
// If nil, input transcription is disabled.
InputAudioTranscription *TranscriptionConfig
// TurnDetection configures server-side voice activity detection.
// If nil, VAD is disabled and turn management is manual.
TurnDetection *TurnDetectionConfig
// Tools defines available functions for the session.
Tools []RealtimeToolDefinition
// Temperature controls randomness (0.6-1.2, default 0.8).
Temperature float64
// MaxResponseOutputTokens limits response length.
// Use "inf" for unlimited, or a specific number.
MaxResponseOutputTokens interface{}
}
RealtimeSessionConfig configures a new OpenAI Realtime streaming session.
func DefaultRealtimeSessionConfig ¶ added in v1.1.6
func DefaultRealtimeSessionConfig() RealtimeSessionConfig
DefaultRealtimeSessionConfig returns sensible defaults for a Realtime session.
type RealtimeToolDef ¶ added in v1.1.6
type RealtimeToolDef struct {
Type string `json:"type"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters map[string]interface{} `json:"parameters,omitempty"`
}
RealtimeToolDef is the tool definition format for session config.
type RealtimeToolDefinition ¶ added in v1.1.6
type RealtimeToolDefinition struct {
// Type is always "function" for function tools.
Type string `json:"type"`
// Name is the function name.
Name string `json:"name"`
// Description explains what the function does.
Description string `json:"description,omitempty"`
// Parameters is the JSON Schema for function parameters.
Parameters map[string]interface{} `json:"parameters,omitempty"`
}
RealtimeToolDefinition defines a function available in the session.
type RealtimeWebSocket ¶ added in v1.1.6
type RealtimeWebSocket struct {
// contains filtered or unexported fields
}
RealtimeWebSocket manages WebSocket connections for OpenAI Realtime API.
func NewRealtimeWebSocket ¶ added in v1.1.6
func NewRealtimeWebSocket(model, apiKey string) *RealtimeWebSocket
NewRealtimeWebSocket creates a new WebSocket manager for OpenAI Realtime API.
func (*RealtimeWebSocket) Close ¶ added in v1.1.6
func (ws *RealtimeWebSocket) Close() error
Close closes the WebSocket connection gracefully.
func (*RealtimeWebSocket) Connect ¶ added in v1.1.6
func (ws *RealtimeWebSocket) Connect(ctx context.Context) error
Connect establishes a WebSocket connection to the OpenAI Realtime API.
func (*RealtimeWebSocket) ConnectWithRetry ¶ added in v1.1.6
func (ws *RealtimeWebSocket) ConnectWithRetry(ctx context.Context) error
ConnectWithRetry attempts to connect with exponential backoff.
func (*RealtimeWebSocket) IsClosed ¶ added in v1.1.6
func (ws *RealtimeWebSocket) IsClosed() bool
IsClosed returns whether the WebSocket is closed.
func (*RealtimeWebSocket) Receive ¶ added in v1.1.6
func (ws *RealtimeWebSocket) Receive(ctx context.Context) ([]byte, error)
Receive reads a message from the WebSocket with context support.
func (*RealtimeWebSocket) ReceiveLoop ¶ added in v1.1.6
func (ws *RealtimeWebSocket) ReceiveLoop(ctx context.Context, msgCh chan<- []byte) error
ReceiveLoop continuously reads messages and sends them to the provided channel. It returns when the connection is closed or an error occurs.
func (*RealtimeWebSocket) Send ¶ added in v1.1.6
func (ws *RealtimeWebSocket) Send(msg interface{}) error
Send sends a message to the WebSocket.
func (*RealtimeWebSocket) StartHeartbeat ¶ added in v1.1.6
func (ws *RealtimeWebSocket) StartHeartbeat(ctx context.Context, interval time.Duration)
StartHeartbeat starts a goroutine that sends ping messages periodically.
type ResponseAudioDeltaEvent ¶ added in v1.1.6
type ResponseAudioDeltaEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Delta string `json:"delta"` // Base64-encoded audio
}
ResponseAudioDeltaEvent provides streaming audio.
type ResponseAudioDoneEvent ¶ added in v1.1.6
type ResponseAudioDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
}
ResponseAudioDoneEvent indicates audio streaming completed.
type ResponseAudioTranscriptDeltaEvent ¶ added in v1.1.6
type ResponseAudioTranscriptDeltaEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Delta string `json:"delta"`
}
ResponseAudioTranscriptDeltaEvent provides streaming transcript.
type ResponseAudioTranscriptDoneEvent ¶ added in v1.1.6
type ResponseAudioTranscriptDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Transcript string `json:"transcript"`
}
ResponseAudioTranscriptDoneEvent indicates transcript completed.
type ResponseCancelEvent ¶ added in v1.1.6
type ResponseCancelEvent struct {
ClientEvent
}
ResponseCancelEvent cancels an in-progress response.
type ResponseConfig ¶ added in v1.1.6
type ResponseConfig struct {
Modalities []string `json:"modalities,omitempty"`
Instructions string `json:"instructions,omitempty"`
Voice string `json:"voice,omitempty"`
OutputAudioFormat string `json:"output_audio_format,omitempty"`
Tools []RealtimeToolDef `json:"tools,omitempty"`
ToolChoice interface{} `json:"tool_choice,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
MaxOutputTokens interface{} `json:"max_output_tokens,omitempty"`
}
ResponseConfig configures a response.
type ResponseContentPartAddedEvent ¶ added in v1.1.6
type ResponseContentPartAddedEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Part ConversationContent `json:"part"`
}
ResponseContentPartAddedEvent indicates content was added.
type ResponseContentPartDoneEvent ¶ added in v1.1.6
type ResponseContentPartDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Part ConversationContent `json:"part"`
}
ResponseContentPartDoneEvent indicates content part completed.
type ResponseCreateEvent ¶ added in v1.1.6
type ResponseCreateEvent struct {
ClientEvent
Response *ResponseConfig `json:"response,omitempty"`
}
ResponseCreateEvent triggers a response from the model.
type ResponseCreatedEvent ¶ added in v1.1.6
type ResponseCreatedEvent struct {
ServerEvent
Response ResponseInfo `json:"response"`
}
ResponseCreatedEvent indicates a response is starting.
type ResponseDoneEvent ¶ added in v1.1.6
type ResponseDoneEvent struct {
ServerEvent
Response ResponseInfo `json:"response"`
}
ResponseDoneEvent indicates a response completed.
type ResponseFunctionCallArgumentsDeltaEvent ¶ added in v1.1.6
type ResponseFunctionCallArgumentsDeltaEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
CallID string `json:"call_id"`
Delta string `json:"delta"`
}
ResponseFunctionCallArgumentsDeltaEvent provides streaming function args.
type ResponseFunctionCallArgumentsDoneEvent ¶ added in v1.1.6
type ResponseFunctionCallArgumentsDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
CallID string `json:"call_id"`
Name string `json:"name"`
Arguments string `json:"arguments"`
}
ResponseFunctionCallArgumentsDoneEvent indicates function args completed.
type ResponseInfo ¶ added in v1.1.6
type ResponseInfo struct {
ID string `json:"id"`
Object string `json:"object"`
Status string `json:"status"`
StatusDetails interface{} `json:"status_details"`
Output []ConversationItem `json:"output"`
Usage *UsageInfo `json:"usage"`
}
ResponseInfo contains response details.
type ResponseOutputItemAddedEvent ¶ added in v1.1.6
type ResponseOutputItemAddedEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
OutputIndex int `json:"output_index"`
Item ConversationItem `json:"item"`
}
ResponseOutputItemAddedEvent indicates an output item was added.
type ResponseOutputItemDoneEvent ¶ added in v1.1.6
type ResponseOutputItemDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
OutputIndex int `json:"output_index"`
Item ConversationItem `json:"item"`
}
ResponseOutputItemDoneEvent indicates an output item completed.
type ResponseTextDeltaEvent ¶ added in v1.1.6
type ResponseTextDeltaEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Delta string `json:"delta"`
}
ResponseTextDeltaEvent provides streaming text.
type ResponseTextDoneEvent ¶ added in v1.1.6
type ResponseTextDoneEvent struct {
ServerEvent
ResponseID string `json:"response_id"`
ItemID string `json:"item_id"`
OutputIndex int `json:"output_index"`
ContentIndex int `json:"content_index"`
Text string `json:"text"`
}
ResponseTextDoneEvent indicates text streaming completed.
type ServerEvent ¶ added in v1.1.6
ServerEvent is the base structure for all server events.
type SessionConfig ¶ added in v1.1.6
type SessionConfig struct {
Modalities []string `json:"modalities,omitempty"`
Instructions string `json:"instructions,omitempty"`
Voice string `json:"voice,omitempty"`
InputAudioFormat string `json:"input_audio_format,omitempty"`
OutputAudioFormat string `json:"output_audio_format,omitempty"`
InputAudioTranscription *TranscriptionConfig `json:"input_audio_transcription,omitempty"`
TurnDetection *TurnDetectionConfig `json:"turn_detection"` // No omitempty - null disables VAD
Tools []RealtimeToolDef `json:"tools,omitempty"`
ToolChoice interface{} `json:"tool_choice,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
MaxResponseOutputTokens interface{} `json:"max_response_output_tokens,omitempty"`
}
SessionConfig is the session configuration sent in session.update. Note: TurnDetection uses a pointer without omitempty so we can explicitly send null to disable VAD. Omitting it causes OpenAI to use default (server_vad).
type SessionCreatedEvent ¶ added in v1.1.6
type SessionCreatedEvent struct {
ServerEvent
Session SessionInfo `json:"session"`
}
SessionCreatedEvent is sent when the session is established.
type SessionInfo ¶ added in v1.1.6
type SessionInfo struct {
ID string `json:"id"`
Object string `json:"object"`
Model string `json:"model"`
Modalities []string `json:"modalities"`
Instructions string `json:"instructions"`
Voice string `json:"voice"`
InputAudioFormat string `json:"input_audio_format"`
OutputAudioFormat string `json:"output_audio_format"`
InputAudioTranscription *TranscriptionConfig `json:"input_audio_transcription"`
TurnDetection *TurnDetectionConfig `json:"turn_detection"`
Tools []RealtimeToolDef `json:"tools"`
Temperature float64 `json:"temperature"`
MaxResponseOutputTokens interface{} `json:"max_response_output_tokens"`
}
SessionInfo contains session details.
type SessionUpdateEvent ¶ added in v1.1.6
type SessionUpdateEvent struct {
ClientEvent
Session SessionConfig `json:"session"`
}
SessionUpdateEvent updates session configuration.
type SessionUpdatedEvent ¶ added in v1.1.6
type SessionUpdatedEvent struct {
ServerEvent
Session SessionInfo `json:"session"`
}
SessionUpdatedEvent confirms a session update.
type ToolProvider ¶ added in v1.1.3
type ToolProvider struct {
*Provider
}
ToolProvider extends OpenAIProvider with tool support
func NewToolProvider ¶ added in v1.1.3
func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, additionalConfig map[string]interface{}) *ToolProvider
NewToolProvider creates a new OpenAI provider with tool support
func (*ToolProvider) BuildTooling ¶ added in v1.1.3
func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)
BuildTooling converts tool descriptors to OpenAI format
func (*ToolProvider) PredictMultimodalWithTools ¶ added in v1.1.3
func (p *ToolProvider) PredictMultimodalWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)
PredictMultimodalWithTools implements providers.MultimodalToolSupport interface for ToolProvider This allows combining multimodal content (images) with tool calls in a single request
func (*ToolProvider) PredictStreamWithTools ¶ added in v1.1.5
func (p *ToolProvider) PredictStreamWithTools( ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string, ) (<-chan providers.StreamChunk, error)
PredictStreamWithTools performs a streaming predict request with tool support
func (*ToolProvider) PredictWithTools ¶ added in v1.1.3
func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)
PredictWithTools performs a prediction request with tool support
type TranscriptionConfig ¶ added in v1.1.6
type TranscriptionConfig struct {
// Model specifies the transcription model.
// Default: "whisper-1"
Model string `json:"model,omitempty"`
}
TranscriptionConfig configures audio transcription.
type TurnDetectionConfig ¶ added in v1.1.6
type TurnDetectionConfig struct {
// Type specifies the VAD type.
// Options: "server_vad", "semantic_vad"
Type string `json:"type"`
// Threshold is the activation threshold (0.0-1.0).
// Default: 0.5
Threshold float64 `json:"threshold,omitempty"`
// PrefixPaddingMs is audio padding before speech in milliseconds.
// Default: 300
PrefixPaddingMs int `json:"prefix_padding_ms,omitempty"`
// SilenceDurationMs is silence duration to detect end of speech.
// Default: 500
SilenceDurationMs int `json:"silence_duration_ms,omitempty"`
// CreateResponse determines if a response is automatically created
// when speech ends. Default: true
CreateResponse bool `json:"create_response,omitempty"`
}
TurnDetectionConfig configures server-side VAD.
type UsageInfo ¶ added in v1.1.6
type UsageInfo struct {
TotalTokens int `json:"total_tokens"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
InputTokenDetails struct {
CachedTokens int `json:"cached_tokens"`
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
} `json:"input_token_details"`
OutputTokenDetails struct {
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
} `json:"output_token_details"`
}
UsageInfo contains token usage information.