Documentation
¶
Index ¶
- Constants
- Variables
- func ClassifyError(apiErr *APIError) error
- type APIError
- type AudioEncoder
- func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)
- func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte
- func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)
- func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)
- func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)
- func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)
- func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte
- func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64
- func (e *AudioEncoder) GetChunkSize() int
- func (e *AudioEncoder) GetSampleRate() int
- func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (chunkStream <-chan *types.MediaChunk, errStream <-chan error)
- func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error
- type ErrorResponse
- type InlineData
- type ModelTurn
- type Part
- type PromptFeedback
- type Provider
- func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo
- func (p *Provider) CreateStreamSession(ctx context.Context, req *providers.StreamInputRequest) (providers.StreamInputSession, error)
- func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities
- func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities
- func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *Provider) SupportsStreamInput() []string
- type RecoveryStrategy
- type SafetyRating
- type ServerContent
- type ServerMessage
- type SetupComplete
- type StreamSession
- func (s *StreamSession) Close() error
- func (s *StreamSession) CompleteTurn(ctx context.Context) error
- func (s *StreamSession) Done() <-chan struct{}
- func (s *StreamSession) Error() error
- func (s *StreamSession) Response() <-chan providers.StreamChunk
- func (s *StreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
- func (s *StreamSession) SendText(ctx context.Context, text string) error
- type StreamSessionConfig
- type ToolProvider
- type WebSocketManager
- func (wm *WebSocketManager) Close() error
- func (wm *WebSocketManager) Connect(ctx context.Context) error
- func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error
- func (wm *WebSocketManager) IsConnected() bool
- func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error
- func (wm *WebSocketManager) Send(msg interface{}) error
- func (wm *WebSocketManager) SendPing() error
- func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)
Constants ¶
const ( // DefaultChunkDuration is 100ms of audio DefaultChunkDuration = 100 // milliseconds // DefaultChunkSize is the number of bytes for 100ms at 16kHz 16-bit mono // 16000 Hz * 0.1 sec * 2 bytes/sample = 3200 bytes DefaultChunkSize = (geminiSampleRate * DefaultChunkDuration / 1000) * bytesPerSample )
const ( ErrNotConnected = "not connected" ErrManagerClosed = "manager is closed" )
Common error messages
const (
ErrSessionClosed = "session is closed"
)
Common error messages
Variables ¶
var ( // ErrInvalidSampleRate indicates an unsupported sample rate ErrInvalidSampleRate = errors.New("invalid sample rate: must be 16000 Hz") // ErrInvalidChannels indicates an unsupported channel count ErrInvalidChannels = errors.New("invalid channels: must be mono (1 channel)") // ErrInvalidBitDepth indicates an unsupported bit depth ErrInvalidBitDepth = errors.New("invalid bit depth: must be 16 bits") // ErrInvalidChunkSize indicates chunk size is not aligned ErrInvalidChunkSize = errors.New("invalid chunk size: must be multiple of sample size") // ErrEmptyAudioData indicates no audio data provided ErrEmptyAudioData = errors.New("empty audio data") )
var ( // ErrInvalidAudioFormat indicates audio format doesn't meet Gemini requirements ErrInvalidAudioFormat = errors.New("invalid audio format") // ErrRateLimitExceeded indicates too many requests ErrRateLimitExceeded = errors.New("rate limit exceeded") // ErrAuthenticationFailed indicates invalid API key ErrAuthenticationFailed = errors.New("authentication failed") ErrServiceUnavailable = errors.New("service unavailable") // ErrPolicyViolation indicates content policy violation ErrPolicyViolation = errors.New("policy violation") // ErrInvalidRequest indicates malformed request ErrInvalidRequest = errors.New("invalid request") )
Common errors for Gemini streaming
Functions ¶
func ClassifyError ¶
ClassifyError converts an API error code to a standard error
Types ¶
type APIError ¶ added in v1.1.3
type APIError struct {
Code int `json:"code"`
Message string `json:"message"`
Status string `json:"status"`
}
GeminiAPIError represents an error from the Gemini API
func (*APIError) IsAuthError ¶ added in v1.1.3
IsAuthError returns true if the error is authentication-related
func (*APIError) IsPolicyViolation ¶ added in v1.1.3
IsPolicyViolation returns true if the error is a content policy violation
func (*APIError) IsRetryable ¶ added in v1.1.3
IsRetryable returns true if the error can be retried
type AudioEncoder ¶
type AudioEncoder struct {
// contains filtered or unexported fields
}
AudioEncoder handles PCM Linear16 audio encoding for Gemini Live API
func NewAudioEncoder ¶
func NewAudioEncoder() *AudioEncoder
NewAudioEncoder creates a new audio encoder with Gemini Live API specifications
func NewAudioEncoderWithChunkSize ¶
func NewAudioEncoderWithChunkSize(chunkSize int) (*AudioEncoder, error)
NewAudioEncoderWithChunkSize creates an encoder with custom chunk size
func (*AudioEncoder) AssembleChunks ¶
func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)
AssembleChunks reassembles MediaChunks back into continuous PCM data.
func (*AudioEncoder) ConvertInt16ToPCM ¶
func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte
ConvertInt16ToPCM converts []int16 samples to PCM bytes (little-endian)
func (*AudioEncoder) ConvertPCMToInt16 ¶
func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)
ConvertPCMToInt16 converts PCM bytes to []int16 samples (little-endian)
func (*AudioEncoder) CreateChunks ¶
func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)
CreateChunks splits PCM audio data into appropriately sized chunks
func (*AudioEncoder) DecodePCM ¶
func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)
DecodePCM decodes base64-encoded audio data back to raw PCM
func (*AudioEncoder) EncodePCM ¶
func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)
EncodePCM encodes raw PCM audio data to base64 for WebSocket transmission
func (*AudioEncoder) GenerateSineWave ¶
func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte
GenerateSineWave generates PCM audio for a sine wave (useful for testing)
func (*AudioEncoder) GetChunkDurationMs ¶
func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64
GetChunkDurationMs calculates the duration of a chunk in milliseconds
func (*AudioEncoder) GetChunkSize ¶
func (e *AudioEncoder) GetChunkSize() int
GetChunkSize returns the configured chunk size in bytes
func (*AudioEncoder) GetSampleRate ¶
func (e *AudioEncoder) GetSampleRate() int
GetSampleRate returns the configured sample rate
func (*AudioEncoder) ReadChunks ¶
func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (chunkStream <-chan *types.MediaChunk, errStream <-chan error)
ReadChunks reads audio from an io.Reader and creates chunks on-the-fly
func (*AudioEncoder) ValidateConfig ¶
func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error
ValidateConfig validates audio configuration against Gemini requirements
type ErrorResponse ¶
type ErrorResponse struct {
Error *APIError `json:"error"`
}
ErrorResponse wraps a GeminiAPIError in a message format
type InlineData ¶
type InlineData struct {
MimeType string `json:"mimeType,omitempty"` // camelCase!
Data string `json:"data,omitempty"` // Base64 encoded
}
InlineData represents inline media data
type ModelTurn ¶
type ModelTurn struct {
Parts []Part `json:"parts,omitempty"`
}
ModelTurn represents a model response turn
type Part ¶
type Part struct {
Text string `json:"text,omitempty"`
InlineData *InlineData `json:"inlineData,omitempty"` // camelCase!
}
Part represents a content part (text or inline data)
type PromptFeedback ¶
type PromptFeedback struct {
SafetyRatings []SafetyRating `json:"safetyRatings,omitempty"`
BlockReason string `json:"blockReason,omitempty"`
}
PromptFeedback contains safety ratings and block reason
func (*PromptFeedback) GetBlockReason ¶
func (f *PromptFeedback) GetBlockReason() string
GetBlockReason returns a human-readable block reason
func (*PromptFeedback) IsBlocked ¶
func (f *PromptFeedback) IsBlocked() bool
IsBlocked returns true if content was blocked by safety filters
type Provider ¶ added in v1.1.3
type Provider struct {
providers.BaseProvider
Model string
BaseURL string
ApiKey string
Defaults providers.ProviderDefaults
}
Provider implements the Provider interface for Google Gemini
func NewProvider ¶ added in v1.1.3
func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *Provider
NewProvider creates a new Gemini provider
func (*Provider) CalculateCost ¶ added in v1.1.3
CalculateCost calculates detailed cost breakdown including optional cached tokens
func (*Provider) CreateStreamSession ¶ added in v1.1.3
func (p *Provider) CreateStreamSession(ctx context.Context, req *providers.StreamInputRequest) (providers.StreamInputSession, error)
CreateStreamSession creates a new bidirectional streaming session with Gemini Live API
Response Modalities: By default, the session is configured to return TEXT responses only. To request audio responses, pass "response_modalities" in the request metadata:
req := providers.StreamInputRequest{
Config: config,
Metadata: map[string]interface{}{
"response_modalities": []string{"AUDIO"}, // Audio only
// OR
"response_modalities": []string{"TEXT", "AUDIO"}, // Both text and audio
},
}
Audio responses will be delivered in the StreamChunk.Metadata["audio_data"] field as base64-encoded PCM.
func (*Provider) GetMultimodalCapabilities ¶ added in v1.1.3
func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities
GetMultimodalCapabilities returns Gemini's multimodal support capabilities
func (*Provider) GetStreamingCapabilities ¶ added in v1.1.3
func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities
GetStreamingCapabilities returns detailed information about Gemini's streaming support
func (*Provider) Predict ¶ added in v1.1.3
func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
Predict sends a predict request to Gemini
func (*Provider) PredictMultimodal ¶ added in v1.1.3
func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
PredictMultimodal performs a predict request with multimodal content
func (*Provider) PredictMultimodalStream ¶ added in v1.1.3
func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
PredictMultimodalStream performs a streaming predict request with multimodal content
func (*Provider) PredictStream ¶ added in v1.1.3
func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
PredictStream streams a predict response from Gemini
func (*Provider) SupportsStreamInput ¶ added in v1.1.3
SupportsStreamInput returns the media types supported for streaming input
type RecoveryStrategy ¶
type RecoveryStrategy int
RecoveryStrategy defines how to handle different error types
const ( // RecoveryRetry indicates the operation should be retried RecoveryRetry RecoveryStrategy = iota // RecoveryFailFast indicates the operation should fail immediately RecoveryFailFast // RecoveryGracefulDegradation indicates fallback to a simpler mode RecoveryGracefulDegradation // RecoveryWaitAndRetry indicates retry after a delay RecoveryWaitAndRetry )
func DetermineRecoveryStrategy ¶
func DetermineRecoveryStrategy(err error) RecoveryStrategy
DetermineRecoveryStrategy determines how to handle an error
type SafetyRating ¶
type SafetyRating struct {
Category string `json:"category"`
Probability string `json:"probability"`
}
SafetyRating represents content safety assessment
type ServerContent ¶
type ServerContent struct {
ModelTurn *ModelTurn `json:"modelTurn,omitempty"`
TurnComplete bool `json:"turnComplete,omitempty"`
Interrupted bool `json:"interrupted,omitempty"`
}
ServerContent represents the server content
type ServerMessage ¶
type ServerMessage struct {
SetupComplete *SetupComplete `json:"setupComplete,omitempty"`
ServerContent *ServerContent `json:"serverContent,omitempty"`
}
ServerMessage represents a message from the Gemini server
func (*ServerMessage) UnmarshalJSON ¶
func (s *ServerMessage) UnmarshalJSON(data []byte) error
UnmarshalJSON unmarshals ServerMessage from JSON with custom handling.
type SetupComplete ¶
type SetupComplete struct{}
SetupComplete indicates setup is complete (empty object per docs)
type StreamSession ¶ added in v1.1.3
type StreamSession struct {
// contains filtered or unexported fields
}
StreamSession implements StreamInputSession for Gemini Live API
func NewStreamSession ¶ added in v1.1.3
func NewStreamSession(ctx context.Context, wsURL, apiKey string, config StreamSessionConfig) (*StreamSession, error)
NewStreamSession creates a new streaming session
func (*StreamSession) Close ¶ added in v1.1.3
func (s *StreamSession) Close() error
Close closes the session
func (*StreamSession) CompleteTurn ¶ added in v1.1.3
func (s *StreamSession) CompleteTurn(ctx context.Context) error
CompleteTurn signals that the current turn is complete
func (*StreamSession) Done ¶ added in v1.1.3
func (s *StreamSession) Done() <-chan struct{}
Done returns a channel that's closed when the session ends
func (*StreamSession) Error ¶ added in v1.1.3
func (s *StreamSession) Error() error
Err returns the error that caused the session to close
func (*StreamSession) Response ¶ added in v1.1.3
func (s *StreamSession) Response() <-chan providers.StreamChunk
Response returns the channel for receiving responses
func (*StreamSession) SendChunk ¶ added in v1.1.3
func (s *StreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
SendChunk sends a media chunk to the server
type StreamSessionConfig ¶
type StreamSessionConfig struct {
Model string // Model name (will be prefixed with "models/" automatically)
ResponseModalities []string // "TEXT" and/or "AUDIO"
}
StreamSessionConfig configures a streaming session
type ToolProvider ¶ added in v1.1.3
type ToolProvider struct {
*Provider
// contains filtered or unexported fields
}
ToolProvider extends GeminiProvider with tool support
func NewToolProvider ¶ added in v1.1.3
func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *ToolProvider
NewToolProvider creates a new Gemini provider with tool support
func (*ToolProvider) BuildTooling ¶ added in v1.1.3
func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)
BuildTooling converts tool descriptors to Gemini format
func (*ToolProvider) PredictWithTools ¶ added in v1.1.3
func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)
PredictWithTools performs a predict request with tool support
type WebSocketManager ¶
type WebSocketManager struct {
// contains filtered or unexported fields
}
WebSocketManager manages a WebSocket connection with reconnection logic.
func NewWebSocketManager ¶
func NewWebSocketManager(url, apiKey string) *WebSocketManager
NewWebSocketManager creates a new WebSocket manager
func (*WebSocketManager) Close ¶
func (wm *WebSocketManager) Close() error
Close gracefully closes the WebSocket connection
func (*WebSocketManager) Connect ¶
func (wm *WebSocketManager) Connect(ctx context.Context) error
Connect establishes a WebSocket connection to the Gemini Live API
func (*WebSocketManager) ConnectWithRetry ¶
func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error
ConnectWithRetry connects with exponential backoff retry logic
func (*WebSocketManager) IsConnected ¶
func (wm *WebSocketManager) IsConnected() bool
IsConnected returns true if the WebSocket is connected
func (*WebSocketManager) Receive ¶
func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error
Receive reads a message from the WebSocket
func (*WebSocketManager) Send ¶
func (wm *WebSocketManager) Send(msg interface{}) error
Send sends a message through the WebSocket
func (*WebSocketManager) SendPing ¶
func (wm *WebSocketManager) SendPing() error
SendPing sends a WebSocket ping to keep the connection alive
func (*WebSocketManager) StartHeartbeat ¶
func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)
StartHeartbeat starts a goroutine that sends periodic pings