Documentation
¶
Index ¶
- Constants
- Variables
- func ClassifyError(apiErr *GeminiAPIError) error
- type AudioEncoder
- func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)
- func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte
- func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)
- func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)
- func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)
- func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)
- func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte
- func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64
- func (e *AudioEncoder) GetChunkSize() int
- func (e *AudioEncoder) GetSampleRate() int
- func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (<-chan *types.MediaChunk, <-chan error)
- func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error
- type ErrorResponse
- type GeminiAPIError
- type GeminiProvider
- func (p *GeminiProvider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo
- func (p *GeminiProvider) CreateStreamSession(ctx context.Context, req *providers.StreamInputRequest) (providers.StreamInputSession, error)
- func (p *GeminiProvider) GetMultimodalCapabilities() providers.MultimodalCapabilities
- func (p *GeminiProvider) GetStreamingCapabilities() providers.StreamingCapabilities
- func (p *GeminiProvider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *GeminiProvider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *GeminiProvider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *GeminiProvider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *GeminiProvider) SupportsStreamInput() []string
- type GeminiStreamSession
- func (s *GeminiStreamSession) Close() error
- func (s *GeminiStreamSession) CompleteTurn(ctx context.Context) error
- func (s *GeminiStreamSession) Done() <-chan struct{}
- func (s *GeminiStreamSession) Error() error
- func (s *GeminiStreamSession) Response() <-chan providers.StreamChunk
- func (s *GeminiStreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
- func (s *GeminiStreamSession) SendText(ctx context.Context, text string) error
- type GeminiToolProvider
- type InlineData
- type ModelTurn
- type Part
- type PromptFeedback
- type RecoveryStrategy
- type SafetyRating
- type ServerContent
- type ServerMessage
- type SetupComplete
- type StreamSessionConfig
- type WebSocketManager
- func (wm *WebSocketManager) Close() error
- func (wm *WebSocketManager) Connect(ctx context.Context) error
- func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error
- func (wm *WebSocketManager) IsConnected() bool
- func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error
- func (wm *WebSocketManager) Send(msg interface{}) error
- func (wm *WebSocketManager) SendPing() error
- func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)
Constants ¶
const ( // DefaultChunkDuration is 100ms of audio DefaultChunkDuration = 100 // milliseconds // DefaultChunkSize is the number of bytes for 100ms at 16kHz 16-bit mono // 16000 Hz * 0.1 sec * 2 bytes/sample = 3200 bytes DefaultChunkSize = (geminiSampleRate * DefaultChunkDuration / 1000) * bytesPerSample )
const ( ErrNotConnected = "not connected" ErrManagerClosed = "manager is closed" )
Common error messages
const (
ErrSessionClosed = "session is closed"
)
Common error messages
Variables ¶
var ( // ErrInvalidSampleRate indicates an unsupported sample rate ErrInvalidSampleRate = errors.New("invalid sample rate: must be 16000 Hz") // ErrInvalidChannels indicates an unsupported channel count ErrInvalidChannels = errors.New("invalid channels: must be mono (1 channel)") // ErrInvalidBitDepth indicates an unsupported bit depth ErrInvalidBitDepth = errors.New("invalid bit depth: must be 16 bits") // ErrInvalidChunkSize indicates chunk size is not aligned ErrInvalidChunkSize = errors.New("invalid chunk size: must be multiple of sample size") // ErrEmptyAudioData indicates no audio data provided ErrEmptyAudioData = errors.New("empty audio data") )
var ( // ErrInvalidAudioFormat indicates audio format doesn't meet Gemini requirements ErrInvalidAudioFormat = errors.New("invalid audio format") // ErrRateLimitExceeded indicates too many requests ErrRateLimitExceeded = errors.New("rate limit exceeded") // ErrAuthenticationFailed indicates invalid API key ErrAuthenticationFailed = errors.New("authentication failed") ErrServiceUnavailable = errors.New("service unavailable") // ErrPolicyViolation indicates content policy violation ErrPolicyViolation = errors.New("policy violation") // ErrInvalidRequest indicates malformed request ErrInvalidRequest = errors.New("invalid request") )
Common errors for Gemini streaming
Functions ¶
func ClassifyError ¶
func ClassifyError(apiErr *GeminiAPIError) error
ClassifyError converts an API error code to a standard error
Types ¶
type AudioEncoder ¶
type AudioEncoder struct {
// contains filtered or unexported fields
}
AudioEncoder handles PCM Linear16 audio encoding for Gemini Live API
func NewAudioEncoder ¶
func NewAudioEncoder() *AudioEncoder
NewAudioEncoder creates a new audio encoder with Gemini Live API specifications
func NewAudioEncoderWithChunkSize ¶
func NewAudioEncoderWithChunkSize(chunkSize int) (*AudioEncoder, error)
NewAudioEncoderWithChunkSize creates an encoder with custom chunk size
func (*AudioEncoder) AssembleChunks ¶
func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)
func (*AudioEncoder) ConvertInt16ToPCM ¶
func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte
ConvertInt16ToPCM converts []int16 samples to PCM bytes (little-endian)
func (*AudioEncoder) ConvertPCMToInt16 ¶
func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)
ConvertPCMToInt16 converts PCM bytes to []int16 samples (little-endian)
func (*AudioEncoder) CreateChunks ¶
func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)
CreateChunks splits PCM audio data into appropriately sized chunks
func (*AudioEncoder) DecodePCM ¶
func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)
DecodePCM decodes base64-encoded audio data back to raw PCM
func (*AudioEncoder) EncodePCM ¶
func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)
EncodePCM encodes raw PCM audio data to base64 for WebSocket transmission
func (*AudioEncoder) GenerateSineWave ¶
func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte
GenerateSineWave generates PCM audio for a sine wave (useful for testing)
func (*AudioEncoder) GetChunkDurationMs ¶
func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64
GetChunkDurationMs calculates the duration of a chunk in milliseconds
func (*AudioEncoder) GetChunkSize ¶
func (e *AudioEncoder) GetChunkSize() int
GetChunkSize returns the configured chunk size in bytes
func (*AudioEncoder) GetSampleRate ¶
func (e *AudioEncoder) GetSampleRate() int
GetSampleRate returns the configured sample rate
func (*AudioEncoder) ReadChunks ¶
func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (<-chan *types.MediaChunk, <-chan error)
ReadChunks reads audio from an io.Reader and creates chunks on-the-fly
func (*AudioEncoder) ValidateConfig ¶
func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error
ValidateConfig validates audio configuration against Gemini requirements
type ErrorResponse ¶
type ErrorResponse struct {
Error *GeminiAPIError `json:"error"`
}
ErrorResponse wraps a GeminiAPIError in a message format
type GeminiAPIError ¶
type GeminiAPIError struct {
Code int `json:"code"`
Message string `json:"message"`
Status string `json:"status"`
}
GeminiAPIError represents an error from the Gemini API
func (*GeminiAPIError) Error ¶
func (e *GeminiAPIError) Error() string
Error implements the error interface
func (*GeminiAPIError) IsAuthError ¶
func (e *GeminiAPIError) IsAuthError() bool
IsAuthError returns true if the error is authentication-related
func (*GeminiAPIError) IsPolicyViolation ¶
func (e *GeminiAPIError) IsPolicyViolation() bool
IsPolicyViolation returns true if the error is a content policy violation
func (*GeminiAPIError) IsRetryable ¶
func (e *GeminiAPIError) IsRetryable() bool
IsRetryable returns true if the error can be retried
type GeminiProvider ¶
type GeminiProvider struct {
providers.BaseProvider
Model string
BaseURL string
ApiKey string
Defaults providers.ProviderDefaults
}
GeminiProvider implements the Provider interface for Google Gemini
func NewGeminiProvider ¶
func NewGeminiProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *GeminiProvider
NewGeminiProvider creates a new Gemini provider
func (*GeminiProvider) CalculateCost ¶
func (p *GeminiProvider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo
CalculateCost calculates detailed cost breakdown including optional cached tokens
func (*GeminiProvider) CreateStreamSession ¶
func (p *GeminiProvider) CreateStreamSession(ctx context.Context, req *providers.StreamInputRequest) (providers.StreamInputSession, error)
CreateStreamSession creates a new bidirectional streaming session with Gemini Live API
Response Modalities: By default, the session is configured to return TEXT responses only. To request audio responses, pass "response_modalities" in the request metadata:
req := providers.StreamInputRequest{
Config: config,
Metadata: map[string]interface{}{
"response_modalities": []string{"AUDIO"}, // Audio only
// OR
"response_modalities": []string{"TEXT", "AUDIO"}, // Both text and audio
},
}
Audio responses will be delivered in the StreamChunk.Metadata["audio_data"] field as base64-encoded PCM.
func (*GeminiProvider) GetMultimodalCapabilities ¶
func (p *GeminiProvider) GetMultimodalCapabilities() providers.MultimodalCapabilities
GetMultimodalCapabilities returns Gemini's multimodal support capabilities
func (*GeminiProvider) GetStreamingCapabilities ¶
func (p *GeminiProvider) GetStreamingCapabilities() providers.StreamingCapabilities
GetStreamingCapabilities returns detailed information about Gemini's streaming support
func (*GeminiProvider) Predict ¶
func (p *GeminiProvider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
Predict sends a predict request to Gemini
func (*GeminiProvider) PredictMultimodal ¶
func (p *GeminiProvider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
PredictMultimodal performs a predict request with multimodal content
func (*GeminiProvider) PredictMultimodalStream ¶
func (p *GeminiProvider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
PredictMultimodalStream performs a streaming predict request with multimodal content
func (*GeminiProvider) PredictStream ¶
func (p *GeminiProvider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
PredictStream streams a predict response from Gemini
func (*GeminiProvider) SupportsStreamInput ¶
func (p *GeminiProvider) SupportsStreamInput() []string
SupportsStreamInput returns the media types supported for streaming input
type GeminiStreamSession ¶
type GeminiStreamSession struct {
// contains filtered or unexported fields
}
GeminiStreamSession implements StreamInputSession for Gemini Live API
func NewGeminiStreamSession ¶
func NewGeminiStreamSession(ctx context.Context, wsURL, apiKey string, config StreamSessionConfig) (*GeminiStreamSession, error)
NewGeminiStreamSession creates a new streaming session
func (*GeminiStreamSession) Close ¶
func (s *GeminiStreamSession) Close() error
Close closes the session
func (*GeminiStreamSession) CompleteTurn ¶
func (s *GeminiStreamSession) CompleteTurn(ctx context.Context) error
CompleteTurn signals that the current turn is complete
func (*GeminiStreamSession) Done ¶
func (s *GeminiStreamSession) Done() <-chan struct{}
Done returns a channel that's closed when the session ends
func (*GeminiStreamSession) Error ¶
func (s *GeminiStreamSession) Error() error
Err returns the error that caused the session to close
func (*GeminiStreamSession) Response ¶
func (s *GeminiStreamSession) Response() <-chan providers.StreamChunk
Response returns the channel for receiving responses
func (*GeminiStreamSession) SendChunk ¶
func (s *GeminiStreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
SendChunk sends a media chunk to the server
type GeminiToolProvider ¶
type GeminiToolProvider struct {
*GeminiProvider
// contains filtered or unexported fields
}
GeminiToolProvider extends GeminiProvider with tool support
func NewGeminiToolProvider ¶
func NewGeminiToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *GeminiToolProvider
NewGeminiToolProvider creates a new Gemini provider with tool support
func (*GeminiToolProvider) BuildTooling ¶
func (p *GeminiToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)
BuildTooling converts tool descriptors to Gemini format
func (*GeminiToolProvider) PredictWithTools ¶
func (p *GeminiToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)
PredictWithTools performs a predict request with tool support
type InlineData ¶
type InlineData struct {
MimeType string `json:"mimeType,omitempty"` // camelCase!
Data string `json:"data,omitempty"` // Base64 encoded
}
InlineData represents inline media data
type ModelTurn ¶
type ModelTurn struct {
Parts []Part `json:"parts,omitempty"`
}
ModelTurn represents a model response turn
type Part ¶
type Part struct {
Text string `json:"text,omitempty"`
InlineData *InlineData `json:"inlineData,omitempty"` // camelCase!
}
Part represents a content part (text or inline data)
type PromptFeedback ¶
type PromptFeedback struct {
SafetyRatings []SafetyRating `json:"safetyRatings,omitempty"`
BlockReason string `json:"blockReason,omitempty"`
}
PromptFeedback contains safety ratings and block reason
func (*PromptFeedback) GetBlockReason ¶
func (f *PromptFeedback) GetBlockReason() string
GetBlockReason returns a human-readable block reason
func (*PromptFeedback) IsBlocked ¶
func (f *PromptFeedback) IsBlocked() bool
IsBlocked returns true if content was blocked by safety filters
type RecoveryStrategy ¶
type RecoveryStrategy int
RecoveryStrategy defines how to handle different error types
const ( // RecoveryRetry indicates the operation should be retried RecoveryRetry RecoveryStrategy = iota // RecoveryFailFast indicates the operation should fail immediately RecoveryFailFast // RecoveryGracefulDegradation indicates fallback to a simpler mode RecoveryGracefulDegradation // RecoveryWaitAndRetry indicates retry after a delay RecoveryWaitAndRetry )
func DetermineRecoveryStrategy ¶
func DetermineRecoveryStrategy(err error) RecoveryStrategy
DetermineRecoveryStrategy determines how to handle an error
type SafetyRating ¶
type SafetyRating struct {
Category string `json:"category"`
Probability string `json:"probability"`
}
SafetyRating represents content safety assessment
type ServerContent ¶
type ServerContent struct {
ModelTurn *ModelTurn `json:"modelTurn,omitempty"`
TurnComplete bool `json:"turnComplete,omitempty"`
Interrupted bool `json:"interrupted,omitempty"`
}
ServerContent represents the server content
type ServerMessage ¶
type ServerMessage struct {
SetupComplete *SetupComplete `json:"setupComplete,omitempty"`
ServerContent *ServerContent `json:"serverContent,omitempty"`
}
ServerMessage represents a message from the Gemini server
func (*ServerMessage) UnmarshalJSON ¶
func (s *ServerMessage) UnmarshalJSON(data []byte) error
Marshal methods for easier JSON serialization
type SetupComplete ¶
type SetupComplete struct{}
SetupComplete indicates setup is complete (empty object per docs)
type StreamSessionConfig ¶
type StreamSessionConfig struct {
Model string // Model name (will be prefixed with "models/" automatically)
ResponseModalities []string // "TEXT" and/or "AUDIO"
}
StreamSessionConfig configures a streaming session
type WebSocketManager ¶
type WebSocketManager struct {
// contains filtered or unexported fields
}
WebSocketManager manages a WebSocket connection with reconnection logic.
func NewWebSocketManager ¶
func NewWebSocketManager(url, apiKey string) *WebSocketManager
NewWebSocketManager creates a new WebSocket manager
func (*WebSocketManager) Close ¶
func (wm *WebSocketManager) Close() error
Close gracefully closes the WebSocket connection
func (*WebSocketManager) Connect ¶
func (wm *WebSocketManager) Connect(ctx context.Context) error
Connect establishes a WebSocket connection to the Gemini Live API
func (*WebSocketManager) ConnectWithRetry ¶
func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error
ConnectWithRetry connects with exponential backoff retry logic
func (*WebSocketManager) IsConnected ¶
func (wm *WebSocketManager) IsConnected() bool
IsConnected returns true if the WebSocket is connected
func (*WebSocketManager) Receive ¶
func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error
Receive reads a message from the WebSocket
func (*WebSocketManager) Send ¶
func (wm *WebSocketManager) Send(msg interface{}) error
Send sends a message through the WebSocket
func (*WebSocketManager) SendPing ¶
func (wm *WebSocketManager) SendPing() error
SendPing sends a WebSocket ping to keep the connection alive
func (*WebSocketManager) StartHeartbeat ¶
func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)
StartHeartbeat starts a goroutine that sends periodic pings