openai

package
v1.1.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 23, 2025 License: Apache-2.0 Imports: 16 Imported by: 0

Documentation

Overview

Package openai provides OpenAI LLM provider integration.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Package openai provides OpenAI Realtime API streaming support.

Index

Constants

View Source
const (
	// DefaultEmbeddingModel is the default model for embeddings
	DefaultEmbeddingModel = "text-embedding-3-small"

	// EmbeddingModelAda002 is the legacy ada-002 model
	EmbeddingModelAda002 = "text-embedding-ada-002"

	// EmbeddingModel3Small is the newer small model with better performance
	EmbeddingModel3Small = "text-embedding-3-small"

	// EmbeddingModel3Large is the large model with highest quality
	EmbeddingModel3Large = "text-embedding-3-large"
)

Embedding model constants

View Source
const (
	// RealtimeAPIEndpoint is the base WebSocket endpoint for OpenAI Realtime API.
	RealtimeAPIEndpoint = "wss://api.openai.com/v1/realtime"

	// RealtimeBetaHeader is required for the Realtime API.
	RealtimeBetaHeader = "realtime=v1"

	// Default audio configuration for OpenAI Realtime API.
	// OpenAI Realtime uses 24kHz 16-bit PCM mono audio.
	DefaultRealtimeSampleRate = 24000
	DefaultRealtimeChannels   = 1
	DefaultRealtimeBitDepth   = 16
)

Realtime API constants

Variables

This section is empty.

Functions

func ParseServerEvent added in v1.1.6

func ParseServerEvent(data []byte) (interface{}, error)

ParseServerEvent parses a raw JSON message into the appropriate event type.

func RealtimeStreamingCapabilities added in v1.1.6

func RealtimeStreamingCapabilities() providers.StreamingCapabilities

RealtimeStreamingCapabilities returns the streaming capabilities for OpenAI Realtime API.

Types

type ClientEvent added in v1.1.6

type ClientEvent struct {
	EventID string `json:"event_id,omitempty"`
	Type    string `json:"type"`
}

ClientEvent is the base structure for all client events.

type ConversationContent added in v1.1.6

type ConversationContent struct {
	Type       string `json:"type"` // "input_text", "input_audio", "text", "audio"
	Text       string `json:"text,omitempty"`
	Audio      string `json:"audio,omitempty"`      // Base64-encoded
	Transcript string `json:"transcript,omitempty"` // For audio content
}

ConversationContent represents content within a conversation item.

type ConversationItem added in v1.1.6

type ConversationItem struct {
	ID        string                `json:"id,omitempty"`
	Type      string                `json:"type"` // "message", "function_call", "function_call_output"
	Status    string                `json:"status,omitempty"`
	Role      string                `json:"role,omitempty"` // "user", "assistant", "system"
	Content   []ConversationContent `json:"content,omitempty"`
	CallID    string                `json:"call_id,omitempty"`   // For function_call_output
	Output    string                `json:"output,omitempty"`    // For function_call_output
	Name      string                `json:"name,omitempty"`      // For function_call
	Arguments string                `json:"arguments,omitempty"` // For function_call
}

ConversationItem represents an item in the conversation.

type ConversationItemCreateEvent added in v1.1.6

type ConversationItemCreateEvent struct {
	ClientEvent
	PreviousItemID string           `json:"previous_item_id,omitempty"`
	Item           ConversationItem `json:"item"`
}

ConversationItemCreateEvent adds an item to the conversation.

type ConversationItemCreatedEvent added in v1.1.6

type ConversationItemCreatedEvent struct {
	ServerEvent
	PreviousItemID string           `json:"previous_item_id"`
	Item           ConversationItem `json:"item"`
}

ConversationItemCreatedEvent confirms an item was added.

type ConversationItemInputAudioTranscriptionCompletedEvent added in v1.1.6

type ConversationItemInputAudioTranscriptionCompletedEvent struct {
	ServerEvent
	ItemID       string `json:"item_id"`
	ContentIndex int    `json:"content_index"`
	Transcript   string `json:"transcript"`
}

ConversationItemInputAudioTranscriptionCompletedEvent provides transcription.

type ConversationItemInputAudioTranscriptionFailedEvent added in v1.1.6

type ConversationItemInputAudioTranscriptionFailedEvent struct {
	ServerEvent
	ItemID       string      `json:"item_id"`
	ContentIndex int         `json:"content_index"`
	Error        ErrorDetail `json:"error"`
}

ConversationItemInputAudioTranscriptionFailedEvent indicates transcription failed.

type EmbeddingOption added in v1.1.6

type EmbeddingOption func(*EmbeddingProvider)

EmbeddingOption configures the EmbeddingProvider.

func WithEmbeddingAPIKey added in v1.1.6

func WithEmbeddingAPIKey(key string) EmbeddingOption

WithEmbeddingAPIKey sets the API key explicitly.

func WithEmbeddingBaseURL added in v1.1.6

func WithEmbeddingBaseURL(url string) EmbeddingOption

WithEmbeddingBaseURL sets a custom base URL (for Azure or proxies).

func WithEmbeddingHTTPClient added in v1.1.6

func WithEmbeddingHTTPClient(client *http.Client) EmbeddingOption

WithEmbeddingHTTPClient sets a custom HTTP client.

func WithEmbeddingModel added in v1.1.6

func WithEmbeddingModel(model string) EmbeddingOption

WithEmbeddingModel sets the embedding model.

type EmbeddingProvider added in v1.1.6

type EmbeddingProvider struct {
	*providers.BaseEmbeddingProvider
}

EmbeddingProvider implements embedding generation via OpenAI API.

func NewEmbeddingProvider added in v1.1.6

func NewEmbeddingProvider(opts ...EmbeddingOption) (*EmbeddingProvider, error)

NewEmbeddingProvider creates an OpenAI embedding provider.

func (*EmbeddingProvider) Embed added in v1.1.6

Embed generates embeddings for the given texts.

func (*EmbeddingProvider) EstimateCost added in v1.1.6

func (p *EmbeddingProvider) EstimateCost(tokens int) float64

EstimateCost estimates the cost for embedding the given number of tokens.

type ErrorDetail added in v1.1.6

type ErrorDetail struct {
	Type    string `json:"type"`
	Code    string `json:"code"`
	Message string `json:"message"`
	Param   string `json:"param,omitempty"`
	EventID string `json:"event_id,omitempty"`
}

ErrorDetail contains error information.

type ErrorEvent added in v1.1.6

type ErrorEvent struct {
	ServerEvent
	Error ErrorDetail `json:"error"`
}

ErrorEvent indicates an error occurred.

type InputAudioBufferAppendEvent added in v1.1.6

type InputAudioBufferAppendEvent struct {
	ClientEvent
	Audio string `json:"audio"` // Base64-encoded audio data
}

InputAudioBufferAppendEvent appends audio to the input buffer.

type InputAudioBufferClearEvent added in v1.1.6

type InputAudioBufferClearEvent struct {
	ClientEvent
}

InputAudioBufferClearEvent clears the audio buffer.

type InputAudioBufferClearedEvent added in v1.1.6

type InputAudioBufferClearedEvent struct {
	ServerEvent
}

InputAudioBufferClearedEvent confirms audio buffer was cleared.

type InputAudioBufferCommitEvent added in v1.1.6

type InputAudioBufferCommitEvent struct {
	ClientEvent
}

InputAudioBufferCommitEvent commits the audio buffer for processing.

type InputAudioBufferCommittedEvent added in v1.1.6

type InputAudioBufferCommittedEvent struct {
	ServerEvent
	PreviousItemID string `json:"previous_item_id"`
	ItemID         string `json:"item_id"`
}

InputAudioBufferCommittedEvent confirms audio buffer was committed.

type InputAudioBufferSpeechStartedEvent added in v1.1.6

type InputAudioBufferSpeechStartedEvent struct {
	ServerEvent
	AudioStartMs int    `json:"audio_start_ms"`
	ItemID       string `json:"item_id"`
}

InputAudioBufferSpeechStartedEvent indicates speech was detected.

type InputAudioBufferSpeechStoppedEvent added in v1.1.6

type InputAudioBufferSpeechStoppedEvent struct {
	ServerEvent
	AudioEndMs int    `json:"audio_end_ms"`
	ItemID     string `json:"item_id"`
}

InputAudioBufferSpeechStoppedEvent indicates speech ended.

type Provider added in v1.1.3

type Provider struct {
	providers.BaseProvider
	// contains filtered or unexported fields
}

OpenAIProvider implements the Provider interface for OpenAI

func NewProvider added in v1.1.3

func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *Provider

NewProvider creates a new OpenAI provider

func (*Provider) CalculateCost added in v1.1.3

func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo

CalculateCost calculates detailed cost breakdown including optional cached tokens

func (*Provider) CreateStreamSession added in v1.1.6

func (p *Provider) CreateStreamSession(
	ctx context.Context,
	req *providers.StreamingInputConfig,
) (providers.StreamInputSession, error)

CreateStreamSession creates a new bidirectional streaming session with OpenAI Realtime API.

The session supports real-time audio input/output with the following features: - Bidirectional audio streaming (send and receive audio simultaneously) - Server-side voice activity detection (VAD) for automatic turn detection - Function/tool calling during the streaming session - Input and output audio transcription

Audio Format: OpenAI Realtime API uses 24kHz 16-bit PCM mono audio by default. The session automatically handles base64 encoding/decoding of audio data.

Example usage:

session, err := provider.CreateStreamSession(ctx, &providers.StreamingInputConfig{
    Config: types.StreamingMediaConfig{
        Type:       types.ContentTypeAudio,
        SampleRate: 24000,
        Encoding:   "pcm16",
        Channels:   1,
    },
    SystemInstruction: "You are a helpful assistant.",
})

func (*Provider) GetMultimodalCapabilities added in v1.1.3

func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities

GetMultimodalCapabilities returns OpenAI's multimodal capabilities

func (*Provider) GetStreamingCapabilities added in v1.1.6

func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities returns detailed information about OpenAI's streaming support.

func (*Provider) Predict added in v1.1.3

Predict sends a predict request to OpenAI

func (*Provider) PredictMultimodal added in v1.1.3

PredictMultimodal performs a predict request with multimodal content

func (*Provider) PredictMultimodalStream added in v1.1.3

func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictMultimodalStream performs a streaming predict request with multimodal content

func (*Provider) PredictStream added in v1.1.3

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream streams a predict response from OpenAI

func (*Provider) SupportsStreamInput added in v1.1.6

func (p *Provider) SupportsStreamInput() []string

SupportsStreamInput returns the media types supported for streaming input.

type RateLimit added in v1.1.6

type RateLimit struct {
	Name         string  `json:"name"`
	Limit        int     `json:"limit"`
	Remaining    int     `json:"remaining"`
	ResetSeconds float64 `json:"reset_seconds"`
}

RateLimit contains rate limit details.

type RateLimitsUpdatedEvent added in v1.1.6

type RateLimitsUpdatedEvent struct {
	ServerEvent
	RateLimits []RateLimit `json:"rate_limits"`
}

RateLimitsUpdatedEvent provides rate limit information.

type RealtimeSession added in v1.1.6

type RealtimeSession struct {
	// contains filtered or unexported fields
}

RealtimeSession implements StreamInputSession for OpenAI Realtime API.

func NewRealtimeSession added in v1.1.6

func NewRealtimeSession(ctx context.Context, apiKey string, config *RealtimeSessionConfig) (*RealtimeSession, error)

NewRealtimeSession creates a new OpenAI Realtime streaming session.

func (*RealtimeSession) CancelResponse added in v1.1.6

func (s *RealtimeSession) CancelResponse() error

CancelResponse cancels an in-progress response.

func (*RealtimeSession) ClearAudioBuffer added in v1.1.6

func (s *RealtimeSession) ClearAudioBuffer() error

ClearAudioBuffer clears the current audio buffer.

func (*RealtimeSession) Close added in v1.1.6

func (s *RealtimeSession) Close() error

Close closes the session.

func (*RealtimeSession) CommitAudioBuffer added in v1.1.6

func (s *RealtimeSession) CommitAudioBuffer() error

CommitAudioBuffer commits the current audio buffer for processing.

func (*RealtimeSession) Done added in v1.1.6

func (s *RealtimeSession) Done() <-chan struct{}

Done returns a channel that's closed when the session ends.

func (*RealtimeSession) EndInput added in v1.1.6

func (s *RealtimeSession) EndInput()

EndInput signals the end of user input. For OpenAI Realtime with server VAD, this commits the audio buffer. For manual turn control, this commits and triggers a response.

func (*RealtimeSession) Error added in v1.1.6

func (s *RealtimeSession) Error() error

Error returns any error that occurred during the session.

func (*RealtimeSession) Response added in v1.1.6

func (s *RealtimeSession) Response() <-chan providers.StreamChunk

Response returns the channel for receiving responses.

func (*RealtimeSession) SendChunk added in v1.1.6

func (s *RealtimeSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error

SendChunk sends an audio chunk to the server.

func (*RealtimeSession) SendSystemContext added in v1.1.6

func (s *RealtimeSession) SendSystemContext(ctx context.Context, text string) error

SendSystemContext sends a text message as context without completing the turn.

func (*RealtimeSession) SendText added in v1.1.6

func (s *RealtimeSession) SendText(ctx context.Context, text string) error

SendText sends a text message and triggers a response.

func (*RealtimeSession) SendToolResponse added in v1.1.6

func (s *RealtimeSession) SendToolResponse(ctx context.Context, toolCallID, result string) error

SendToolResponse sends the result of a tool execution back to the model.

func (*RealtimeSession) SendToolResponses added in v1.1.6

func (s *RealtimeSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error

SendToolResponses sends multiple tool results at once (for parallel tool calls).

func (*RealtimeSession) TriggerResponse added in v1.1.6

func (s *RealtimeSession) TriggerResponse(config *ResponseConfig) error

TriggerResponse manually triggers a response from the model.

type RealtimeSessionConfig added in v1.1.6

type RealtimeSessionConfig struct {
	// Model specifies the model to use (e.g., "gpt-4o-realtime-preview").
	Model string

	// Modalities specifies the input/output modalities.
	// Valid values: "text", "audio"
	// Default: ["text", "audio"]
	Modalities []string

	// Instructions is the system prompt for the session.
	Instructions string

	// Voice selects the voice for audio output.
	// Options: "alloy", "echo", "fable", "onyx", "nova", "shimmer"
	// Default: "alloy"
	Voice string

	// InputAudioFormat specifies the format for input audio.
	// Options: "pcm16", "g711_ulaw", "g711_alaw"
	// Default: "pcm16"
	InputAudioFormat string

	// OutputAudioFormat specifies the format for output audio.
	// Options: "pcm16", "g711_ulaw", "g711_alaw"
	// Default: "pcm16"
	OutputAudioFormat string

	// InputAudioTranscription configures transcription of input audio.
	// If nil, input transcription is disabled.
	InputAudioTranscription *TranscriptionConfig

	// TurnDetection configures server-side voice activity detection.
	// If nil, VAD is disabled and turn management is manual.
	TurnDetection *TurnDetectionConfig

	// Tools defines available functions for the session.
	Tools []RealtimeToolDefinition

	// Temperature controls randomness (0.6-1.2, default 0.8).
	Temperature float64

	// MaxResponseOutputTokens limits response length.
	// Use "inf" for unlimited, or a specific number.
	MaxResponseOutputTokens interface{}
}

RealtimeSessionConfig configures a new OpenAI Realtime streaming session.

func DefaultRealtimeSessionConfig added in v1.1.6

func DefaultRealtimeSessionConfig() RealtimeSessionConfig

DefaultRealtimeSessionConfig returns sensible defaults for a Realtime session.

type RealtimeToolDef added in v1.1.6

type RealtimeToolDef struct {
	Type        string                 `json:"type"`
	Name        string                 `json:"name"`
	Description string                 `json:"description,omitempty"`
	Parameters  map[string]interface{} `json:"parameters,omitempty"`
}

RealtimeToolDef is the tool definition format for session config.

type RealtimeToolDefinition added in v1.1.6

type RealtimeToolDefinition struct {
	// Type is always "function" for function tools.
	Type string `json:"type"`

	// Name is the function name.
	Name string `json:"name"`

	// Description explains what the function does.
	Description string `json:"description,omitempty"`

	// Parameters is the JSON Schema for function parameters.
	Parameters map[string]interface{} `json:"parameters,omitempty"`
}

RealtimeToolDefinition defines a function available in the session.

type RealtimeWebSocket added in v1.1.6

type RealtimeWebSocket struct {
	// contains filtered or unexported fields
}

RealtimeWebSocket manages WebSocket connections for OpenAI Realtime API.

func NewRealtimeWebSocket added in v1.1.6

func NewRealtimeWebSocket(model, apiKey string) *RealtimeWebSocket

NewRealtimeWebSocket creates a new WebSocket manager for OpenAI Realtime API.

func (*RealtimeWebSocket) Close added in v1.1.6

func (ws *RealtimeWebSocket) Close() error

Close closes the WebSocket connection gracefully.

func (*RealtimeWebSocket) Connect added in v1.1.6

func (ws *RealtimeWebSocket) Connect(ctx context.Context) error

Connect establishes a WebSocket connection to the OpenAI Realtime API.

func (*RealtimeWebSocket) ConnectWithRetry added in v1.1.6

func (ws *RealtimeWebSocket) ConnectWithRetry(ctx context.Context) error

ConnectWithRetry attempts to connect with exponential backoff.

func (*RealtimeWebSocket) IsClosed added in v1.1.6

func (ws *RealtimeWebSocket) IsClosed() bool

IsClosed returns whether the WebSocket is closed.

func (*RealtimeWebSocket) Receive added in v1.1.6

func (ws *RealtimeWebSocket) Receive(ctx context.Context) ([]byte, error)

Receive reads a message from the WebSocket with context support.

func (*RealtimeWebSocket) ReceiveLoop added in v1.1.6

func (ws *RealtimeWebSocket) ReceiveLoop(ctx context.Context, msgCh chan<- []byte) error

ReceiveLoop continuously reads messages and sends them to the provided channel. It returns when the connection is closed or an error occurs.

func (*RealtimeWebSocket) Send added in v1.1.6

func (ws *RealtimeWebSocket) Send(msg interface{}) error

Send sends a message to the WebSocket.

func (*RealtimeWebSocket) StartHeartbeat added in v1.1.6

func (ws *RealtimeWebSocket) StartHeartbeat(ctx context.Context, interval time.Duration)

StartHeartbeat starts a goroutine that sends ping messages periodically.

type ResponseAudioDeltaEvent added in v1.1.6

type ResponseAudioDeltaEvent struct {
	ServerEvent
	ResponseID   string `json:"response_id"`
	ItemID       string `json:"item_id"`
	OutputIndex  int    `json:"output_index"`
	ContentIndex int    `json:"content_index"`
	Delta        string `json:"delta"` // Base64-encoded audio
}

ResponseAudioDeltaEvent provides streaming audio.

type ResponseAudioDoneEvent added in v1.1.6

type ResponseAudioDoneEvent struct {
	ServerEvent
	ResponseID   string `json:"response_id"`
	ItemID       string `json:"item_id"`
	OutputIndex  int    `json:"output_index"`
	ContentIndex int    `json:"content_index"`
}

ResponseAudioDoneEvent indicates audio streaming completed.

type ResponseAudioTranscriptDeltaEvent added in v1.1.6

type ResponseAudioTranscriptDeltaEvent struct {
	ServerEvent
	ResponseID   string `json:"response_id"`
	ItemID       string `json:"item_id"`
	OutputIndex  int    `json:"output_index"`
	ContentIndex int    `json:"content_index"`
	Delta        string `json:"delta"`
}

ResponseAudioTranscriptDeltaEvent provides streaming transcript.

type ResponseAudioTranscriptDoneEvent added in v1.1.6

type ResponseAudioTranscriptDoneEvent struct {
	ServerEvent
	ResponseID   string `json:"response_id"`
	ItemID       string `json:"item_id"`
	OutputIndex  int    `json:"output_index"`
	ContentIndex int    `json:"content_index"`
	Transcript   string `json:"transcript"`
}

ResponseAudioTranscriptDoneEvent indicates transcript completed.

type ResponseCancelEvent added in v1.1.6

type ResponseCancelEvent struct {
	ClientEvent
}

ResponseCancelEvent cancels an in-progress response.

type ResponseConfig added in v1.1.6

type ResponseConfig struct {
	Modalities        []string          `json:"modalities,omitempty"`
	Instructions      string            `json:"instructions,omitempty"`
	Voice             string            `json:"voice,omitempty"`
	OutputAudioFormat string            `json:"output_audio_format,omitempty"`
	Tools             []RealtimeToolDef `json:"tools,omitempty"`
	ToolChoice        interface{}       `json:"tool_choice,omitempty"`
	Temperature       float64           `json:"temperature,omitempty"`
	MaxOutputTokens   interface{}       `json:"max_output_tokens,omitempty"`
}

ResponseConfig configures a response.

type ResponseContentPartAddedEvent added in v1.1.6

type ResponseContentPartAddedEvent struct {
	ServerEvent
	ResponseID   string              `json:"response_id"`
	ItemID       string              `json:"item_id"`
	OutputIndex  int                 `json:"output_index"`
	ContentIndex int                 `json:"content_index"`
	Part         ConversationContent `json:"part"`
}

ResponseContentPartAddedEvent indicates content was added.

type ResponseContentPartDoneEvent added in v1.1.6

type ResponseContentPartDoneEvent struct {
	ServerEvent
	ResponseID   string              `json:"response_id"`
	ItemID       string              `json:"item_id"`
	OutputIndex  int                 `json:"output_index"`
	ContentIndex int                 `json:"content_index"`
	Part         ConversationContent `json:"part"`
}

ResponseContentPartDoneEvent indicates content part completed.

type ResponseCreateEvent added in v1.1.6

type ResponseCreateEvent struct {
	ClientEvent
	Response *ResponseConfig `json:"response,omitempty"`
}

ResponseCreateEvent triggers a response from the model.

type ResponseCreatedEvent added in v1.1.6

type ResponseCreatedEvent struct {
	ServerEvent
	Response ResponseInfo `json:"response"`
}

ResponseCreatedEvent indicates a response is starting.

type ResponseDoneEvent added in v1.1.6

type ResponseDoneEvent struct {
	ServerEvent
	Response ResponseInfo `json:"response"`
}

ResponseDoneEvent indicates a response completed.

type ResponseFunctionCallArgumentsDeltaEvent added in v1.1.6

type ResponseFunctionCallArgumentsDeltaEvent struct {
	ServerEvent
	ResponseID  string `json:"response_id"`
	ItemID      string `json:"item_id"`
	OutputIndex int    `json:"output_index"`
	CallID      string `json:"call_id"`
	Delta       string `json:"delta"`
}

ResponseFunctionCallArgumentsDeltaEvent provides streaming function args.

type ResponseFunctionCallArgumentsDoneEvent added in v1.1.6

type ResponseFunctionCallArgumentsDoneEvent struct {
	ServerEvent
	ResponseID  string `json:"response_id"`
	ItemID      string `json:"item_id"`
	OutputIndex int    `json:"output_index"`
	CallID      string `json:"call_id"`
	Name        string `json:"name"`
	Arguments   string `json:"arguments"`
}

ResponseFunctionCallArgumentsDoneEvent indicates function args completed.

type ResponseInfo added in v1.1.6

type ResponseInfo struct {
	ID            string             `json:"id"`
	Object        string             `json:"object"`
	Status        string             `json:"status"`
	StatusDetails interface{}        `json:"status_details"`
	Output        []ConversationItem `json:"output"`
	Usage         *UsageInfo         `json:"usage"`
}

ResponseInfo contains response details.

type ResponseOutputItemAddedEvent added in v1.1.6

type ResponseOutputItemAddedEvent struct {
	ServerEvent
	ResponseID  string           `json:"response_id"`
	OutputIndex int              `json:"output_index"`
	Item        ConversationItem `json:"item"`
}

ResponseOutputItemAddedEvent indicates an output item was added.

type ResponseOutputItemDoneEvent added in v1.1.6

type ResponseOutputItemDoneEvent struct {
	ServerEvent
	ResponseID  string           `json:"response_id"`
	OutputIndex int              `json:"output_index"`
	Item        ConversationItem `json:"item"`
}

ResponseOutputItemDoneEvent indicates an output item completed.

type ResponseTextDeltaEvent added in v1.1.6

type ResponseTextDeltaEvent struct {
	ServerEvent
	ResponseID   string `json:"response_id"`
	ItemID       string `json:"item_id"`
	OutputIndex  int    `json:"output_index"`
	ContentIndex int    `json:"content_index"`
	Delta        string `json:"delta"`
}

ResponseTextDeltaEvent provides streaming text.

type ResponseTextDoneEvent added in v1.1.6

type ResponseTextDoneEvent struct {
	ServerEvent
	ResponseID   string `json:"response_id"`
	ItemID       string `json:"item_id"`
	OutputIndex  int    `json:"output_index"`
	ContentIndex int    `json:"content_index"`
	Text         string `json:"text"`
}

ResponseTextDoneEvent indicates text streaming completed.

type ServerEvent added in v1.1.6

type ServerEvent struct {
	EventID string `json:"event_id"`
	Type    string `json:"type"`
}

ServerEvent is the base structure for all server events.

type SessionConfig added in v1.1.6

type SessionConfig struct {
	Modalities              []string             `json:"modalities,omitempty"`
	Instructions            string               `json:"instructions,omitempty"`
	Voice                   string               `json:"voice,omitempty"`
	InputAudioFormat        string               `json:"input_audio_format,omitempty"`
	OutputAudioFormat       string               `json:"output_audio_format,omitempty"`
	InputAudioTranscription *TranscriptionConfig `json:"input_audio_transcription,omitempty"`
	TurnDetection           *TurnDetectionConfig `json:"turn_detection"` // No omitempty - null disables VAD
	Tools                   []RealtimeToolDef    `json:"tools,omitempty"`
	ToolChoice              interface{}          `json:"tool_choice,omitempty"`
	Temperature             float64              `json:"temperature,omitempty"`
	MaxResponseOutputTokens interface{}          `json:"max_response_output_tokens,omitempty"`
}

SessionConfig is the session configuration sent in session.update. Note: TurnDetection uses a pointer without omitempty so we can explicitly send null to disable VAD. Omitting it causes OpenAI to use default (server_vad).

type SessionCreatedEvent added in v1.1.6

type SessionCreatedEvent struct {
	ServerEvent
	Session SessionInfo `json:"session"`
}

SessionCreatedEvent is sent when the session is established.

type SessionInfo added in v1.1.6

type SessionInfo struct {
	ID                      string               `json:"id"`
	Object                  string               `json:"object"`
	Model                   string               `json:"model"`
	Modalities              []string             `json:"modalities"`
	Instructions            string               `json:"instructions"`
	Voice                   string               `json:"voice"`
	InputAudioFormat        string               `json:"input_audio_format"`
	OutputAudioFormat       string               `json:"output_audio_format"`
	InputAudioTranscription *TranscriptionConfig `json:"input_audio_transcription"`
	TurnDetection           *TurnDetectionConfig `json:"turn_detection"`
	Tools                   []RealtimeToolDef    `json:"tools"`
	Temperature             float64              `json:"temperature"`
	MaxResponseOutputTokens interface{}          `json:"max_response_output_tokens"`
}

SessionInfo contains session details.

type SessionUpdateEvent added in v1.1.6

type SessionUpdateEvent struct {
	ClientEvent
	Session SessionConfig `json:"session"`
}

SessionUpdateEvent updates session configuration.

type SessionUpdatedEvent added in v1.1.6

type SessionUpdatedEvent struct {
	ServerEvent
	Session SessionInfo `json:"session"`
}

SessionUpdatedEvent confirms a session update.

type ToolProvider added in v1.1.3

type ToolProvider struct {
	*Provider
}

ToolProvider extends OpenAIProvider with tool support

func NewToolProvider added in v1.1.3

func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool, additionalConfig map[string]interface{}) *ToolProvider

NewToolProvider creates a new OpenAI provider with tool support

func (*ToolProvider) BuildTooling added in v1.1.3

func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)

BuildTooling converts tool descriptors to OpenAI format

func (*ToolProvider) PredictMultimodalWithTools added in v1.1.3

func (p *ToolProvider) PredictMultimodalWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictMultimodalWithTools implements providers.MultimodalToolSupport interface for ToolProvider This allows combining multimodal content (images) with tool calls in a single request

func (*ToolProvider) PredictStreamWithTools added in v1.1.5

func (p *ToolProvider) PredictStreamWithTools(
	ctx context.Context,
	req providers.PredictionRequest,
	tools interface{},
	toolChoice string,
) (<-chan providers.StreamChunk, error)

PredictStreamWithTools performs a streaming predict request with tool support

func (*ToolProvider) PredictWithTools added in v1.1.3

func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools performs a prediction request with tool support

type TranscriptionConfig added in v1.1.6

type TranscriptionConfig struct {
	// Model specifies the transcription model.
	// Default: "whisper-1"
	Model string `json:"model,omitempty"`
}

TranscriptionConfig configures audio transcription.

type TurnDetectionConfig added in v1.1.6

type TurnDetectionConfig struct {
	// Type specifies the VAD type.
	// Options: "server_vad", "semantic_vad"
	Type string `json:"type"`

	// Threshold is the activation threshold (0.0-1.0).
	// Default: 0.5
	Threshold float64 `json:"threshold,omitempty"`

	// PrefixPaddingMs is audio padding before speech in milliseconds.
	// Default: 300
	PrefixPaddingMs int `json:"prefix_padding_ms,omitempty"`

	// SilenceDurationMs is silence duration to detect end of speech.
	// Default: 500
	SilenceDurationMs int `json:"silence_duration_ms,omitempty"`

	// CreateResponse determines if a response is automatically created
	// when speech ends. Default: true
	CreateResponse bool `json:"create_response,omitempty"`
}

TurnDetectionConfig configures server-side VAD.

type UsageInfo added in v1.1.6

type UsageInfo struct {
	TotalTokens       int `json:"total_tokens"`
	InputTokens       int `json:"input_tokens"`
	OutputTokens      int `json:"output_tokens"`
	InputTokenDetails struct {
		CachedTokens int `json:"cached_tokens"`
		TextTokens   int `json:"text_tokens"`
		AudioTokens  int `json:"audio_tokens"`
	} `json:"input_token_details"`
	OutputTokenDetails struct {
		TextTokens  int `json:"text_tokens"`
		AudioTokens int `json:"audio_tokens"`
	} `json:"output_token_details"`
}

UsageInfo contains token usage information.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL