gemini

package
v1.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 17, 2025 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const (

	// DefaultChunkDuration is 100ms of audio
	DefaultChunkDuration = 100 // milliseconds
	// DefaultChunkSize is the number of bytes for 100ms at 16kHz 16-bit mono
	// 16000 Hz * 0.1 sec * 2 bytes/sample = 3200 bytes
	DefaultChunkSize = (geminiSampleRate * DefaultChunkDuration / 1000) * bytesPerSample
)
View Source
const (
	ErrNotConnected  = "not connected"
	ErrManagerClosed = "manager is closed"
)

Common error messages

View Source
const (
	ErrSessionClosed = "session is closed"
)

Common error messages

Variables

View Source
var (
	// ErrInvalidSampleRate indicates an unsupported sample rate
	ErrInvalidSampleRate = errors.New("invalid sample rate: must be 16000 Hz")
	// ErrInvalidChannels indicates an unsupported channel count
	ErrInvalidChannels = errors.New("invalid channels: must be mono (1 channel)")
	// ErrInvalidBitDepth indicates an unsupported bit depth
	ErrInvalidBitDepth = errors.New("invalid bit depth: must be 16 bits")
	// ErrInvalidChunkSize indicates chunk size is not aligned
	ErrInvalidChunkSize = errors.New("invalid chunk size: must be multiple of sample size")
	// ErrEmptyAudioData indicates no audio data provided
	ErrEmptyAudioData = errors.New("empty audio data")
)
View Source
var (
	// ErrInvalidAudioFormat indicates audio format doesn't meet Gemini requirements
	ErrInvalidAudioFormat = errors.New("invalid audio format")

	// ErrRateLimitExceeded indicates too many requests
	ErrRateLimitExceeded = errors.New("rate limit exceeded")

	// ErrAuthenticationFailed indicates invalid API key
	ErrAuthenticationFailed = errors.New("authentication failed")

	// ErrServiceUnavailable indicates temporary service issue
	ErrServiceUnavailable = errors.New("service unavailable")

	// ErrPolicyViolation indicates content policy violation
	ErrPolicyViolation = errors.New("policy violation")

	// ErrInvalidRequest indicates malformed request
	ErrInvalidRequest = errors.New("invalid request")
)

Common errors for Gemini streaming

Functions

func ClassifyError

func ClassifyError(apiErr *GeminiAPIError) error

ClassifyError converts an API error code to a standard error

Types

type AudioEncoder

type AudioEncoder struct {
	// contains filtered or unexported fields
}

AudioEncoder handles PCM Linear16 audio encoding for Gemini Live API

func NewAudioEncoder

func NewAudioEncoder() *AudioEncoder

NewAudioEncoder creates a new audio encoder with Gemini Live API specifications

func NewAudioEncoderWithChunkSize

func NewAudioEncoderWithChunkSize(chunkSize int) (*AudioEncoder, error)

NewAudioEncoderWithChunkSize creates an encoder with custom chunk size

func (*AudioEncoder) AssembleChunks

func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)

func (*AudioEncoder) ConvertInt16ToPCM

func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte

ConvertInt16ToPCM converts []int16 samples to PCM bytes (little-endian)

func (*AudioEncoder) ConvertPCMToInt16

func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)

ConvertPCMToInt16 converts PCM bytes to []int16 samples (little-endian)

func (*AudioEncoder) CreateChunks

func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)

CreateChunks splits PCM audio data into appropriately sized chunks

func (*AudioEncoder) DecodePCM

func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)

DecodePCM decodes base64-encoded audio data back to raw PCM

func (*AudioEncoder) EncodePCM

func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)

EncodePCM encodes raw PCM audio data to base64 for WebSocket transmission

func (*AudioEncoder) GenerateSineWave

func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte

GenerateSineWave generates PCM audio for a sine wave (useful for testing)

func (*AudioEncoder) GetChunkDurationMs

func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64

GetChunkDurationMs calculates the duration of a chunk in milliseconds

func (*AudioEncoder) GetChunkSize

func (e *AudioEncoder) GetChunkSize() int

GetChunkSize returns the configured chunk size in bytes

func (*AudioEncoder) GetSampleRate

func (e *AudioEncoder) GetSampleRate() int

GetSampleRate returns the configured sample rate

func (*AudioEncoder) ReadChunks

func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (<-chan *types.MediaChunk, <-chan error)

ReadChunks reads audio from an io.Reader and creates chunks on-the-fly

func (*AudioEncoder) ValidateConfig

func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error

ValidateConfig validates audio configuration against Gemini requirements

type ErrorResponse

type ErrorResponse struct {
	Error *GeminiAPIError `json:"error"`
}

ErrorResponse wraps a GeminiAPIError in a message format

type GeminiAPIError

type GeminiAPIError struct {
	Code    int    `json:"code"`
	Message string `json:"message"`
	Status  string `json:"status"`
}

GeminiAPIError represents an error from the Gemini API

func (*GeminiAPIError) Error

func (e *GeminiAPIError) Error() string

Error implements the error interface

func (*GeminiAPIError) IsAuthError

func (e *GeminiAPIError) IsAuthError() bool

IsAuthError returns true if the error is authentication-related

func (*GeminiAPIError) IsPolicyViolation

func (e *GeminiAPIError) IsPolicyViolation() bool

IsPolicyViolation returns true if the error is a content policy violation

func (*GeminiAPIError) IsRetryable

func (e *GeminiAPIError) IsRetryable() bool

IsRetryable returns true if the error can be retried

type GeminiProvider

type GeminiProvider struct {
	providers.BaseProvider
	Model    string
	BaseURL  string
	ApiKey   string
	Defaults providers.ProviderDefaults
}

GeminiProvider implements the Provider interface for Google Gemini

func NewGeminiProvider

func NewGeminiProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *GeminiProvider

NewGeminiProvider creates a new Gemini provider

func (*GeminiProvider) CalculateCost

func (p *GeminiProvider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo

CalculateCost calculates detailed cost breakdown including optional cached tokens

func (*GeminiProvider) CreateStreamSession

CreateStreamSession creates a new bidirectional streaming session with Gemini Live API

Response Modalities: By default, the session is configured to return TEXT responses only. To request audio responses, pass "response_modalities" in the request metadata:

req := providers.StreamInputRequest{
    Config: config,
    Metadata: map[string]interface{}{
        "response_modalities": []string{"AUDIO"},        // Audio only
        // OR
        "response_modalities": []string{"TEXT", "AUDIO"}, // Both text and audio
    },
}

Audio responses will be delivered in the StreamChunk.Metadata["audio_data"] field as base64-encoded PCM.

func (*GeminiProvider) GetMultimodalCapabilities

func (p *GeminiProvider) GetMultimodalCapabilities() providers.MultimodalCapabilities

GetMultimodalCapabilities returns Gemini's multimodal support capabilities

func (*GeminiProvider) GetStreamingCapabilities

func (p *GeminiProvider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities returns detailed information about Gemini's streaming support

func (*GeminiProvider) Predict

Predict sends a predict request to Gemini

func (*GeminiProvider) PredictMultimodal

PredictMultimodal performs a predict request with multimodal content

func (*GeminiProvider) PredictMultimodalStream

func (p *GeminiProvider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictMultimodalStream performs a streaming predict request with multimodal content

func (*GeminiProvider) PredictStream

func (p *GeminiProvider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream streams a predict response from Gemini

func (*GeminiProvider) SupportsStreamInput

func (p *GeminiProvider) SupportsStreamInput() []string

SupportsStreamInput returns the media types supported for streaming input

type GeminiStreamSession

type GeminiStreamSession struct {
	// contains filtered or unexported fields
}

GeminiStreamSession implements StreamInputSession for Gemini Live API

func NewGeminiStreamSession

func NewGeminiStreamSession(ctx context.Context, wsURL, apiKey string, config StreamSessionConfig) (*GeminiStreamSession, error)

NewGeminiStreamSession creates a new streaming session

func (*GeminiStreamSession) Close

func (s *GeminiStreamSession) Close() error

Close closes the session

func (*GeminiStreamSession) CompleteTurn

func (s *GeminiStreamSession) CompleteTurn(ctx context.Context) error

CompleteTurn signals that the current turn is complete

func (*GeminiStreamSession) Done

func (s *GeminiStreamSession) Done() <-chan struct{}

Done returns a channel that's closed when the session ends

func (*GeminiStreamSession) Error

func (s *GeminiStreamSession) Error() error

Err returns the error that caused the session to close

func (*GeminiStreamSession) Response

func (s *GeminiStreamSession) Response() <-chan providers.StreamChunk

Response returns the channel for receiving responses

func (*GeminiStreamSession) SendChunk

func (s *GeminiStreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error

SendChunk sends a media chunk to the server

func (*GeminiStreamSession) SendText

func (s *GeminiStreamSession) SendText(ctx context.Context, text string) error

SendText sends a text message to the server and marks the turn as complete

type GeminiToolProvider

type GeminiToolProvider struct {
	*GeminiProvider
	// contains filtered or unexported fields
}

GeminiToolProvider extends GeminiProvider with tool support

func NewGeminiToolProvider

func NewGeminiToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *GeminiToolProvider

NewGeminiToolProvider creates a new Gemini provider with tool support

func (*GeminiToolProvider) BuildTooling

func (p *GeminiToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)

BuildTooling converts tool descriptors to Gemini format

func (*GeminiToolProvider) PredictWithTools

func (p *GeminiToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools performs a predict request with tool support

type InlineData

type InlineData struct {
	MimeType string `json:"mimeType,omitempty"` // camelCase!
	Data     string `json:"data,omitempty"`     // Base64 encoded
}

InlineData represents inline media data

type ModelTurn

type ModelTurn struct {
	Parts []Part `json:"parts,omitempty"`
}

ModelTurn represents a model response turn

type Part

type Part struct {
	Text       string      `json:"text,omitempty"`
	InlineData *InlineData `json:"inlineData,omitempty"` // camelCase!
}

Part represents a content part (text or inline data)

type PromptFeedback

type PromptFeedback struct {
	SafetyRatings []SafetyRating `json:"safetyRatings,omitempty"`
	BlockReason   string         `json:"blockReason,omitempty"`
}

PromptFeedback contains safety ratings and block reason

func (*PromptFeedback) GetBlockReason

func (f *PromptFeedback) GetBlockReason() string

GetBlockReason returns a human-readable block reason

func (*PromptFeedback) IsBlocked

func (f *PromptFeedback) IsBlocked() bool

IsBlocked returns true if content was blocked by safety filters

type RecoveryStrategy

type RecoveryStrategy int

RecoveryStrategy defines how to handle different error types

const (
	// RecoveryRetry indicates the operation should be retried
	RecoveryRetry RecoveryStrategy = iota

	// RecoveryFailFast indicates the operation should fail immediately
	RecoveryFailFast

	// RecoveryGracefulDegradation indicates fallback to a simpler mode
	RecoveryGracefulDegradation

	// RecoveryWaitAndRetry indicates retry after a delay
	RecoveryWaitAndRetry
)

func DetermineRecoveryStrategy

func DetermineRecoveryStrategy(err error) RecoveryStrategy

DetermineRecoveryStrategy determines how to handle an error

type SafetyRating

type SafetyRating struct {
	Category    string `json:"category"`
	Probability string `json:"probability"`
}

SafetyRating represents content safety assessment

type ServerContent

type ServerContent struct {
	ModelTurn    *ModelTurn `json:"modelTurn,omitempty"`
	TurnComplete bool       `json:"turnComplete,omitempty"`
	Interrupted  bool       `json:"interrupted,omitempty"`
}

ServerContent represents the server content

type ServerMessage

type ServerMessage struct {
	SetupComplete *SetupComplete `json:"setupComplete,omitempty"`
	ServerContent *ServerContent `json:"serverContent,omitempty"`
}

ServerMessage represents a message from the Gemini server

func (*ServerMessage) UnmarshalJSON

func (s *ServerMessage) UnmarshalJSON(data []byte) error

Marshal methods for easier JSON serialization

type SetupComplete

type SetupComplete struct{}

SetupComplete indicates setup is complete (empty object per docs)

type StreamSessionConfig

type StreamSessionConfig struct {
	Model              string   // Model name (will be prefixed with "models/" automatically)
	ResponseModalities []string // "TEXT" and/or "AUDIO"
}

StreamSessionConfig configures a streaming session

type WebSocketManager

type WebSocketManager struct {
	// contains filtered or unexported fields
}

WebSocketManager manages a WebSocket connection with reconnection logic.

func NewWebSocketManager

func NewWebSocketManager(url, apiKey string) *WebSocketManager

NewWebSocketManager creates a new WebSocket manager

func (*WebSocketManager) Close

func (wm *WebSocketManager) Close() error

Close gracefully closes the WebSocket connection

func (*WebSocketManager) Connect

func (wm *WebSocketManager) Connect(ctx context.Context) error

Connect establishes a WebSocket connection to the Gemini Live API

func (*WebSocketManager) ConnectWithRetry

func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error

ConnectWithRetry connects with exponential backoff retry logic

func (*WebSocketManager) IsConnected

func (wm *WebSocketManager) IsConnected() bool

IsConnected returns true if the WebSocket is connected

func (*WebSocketManager) Receive

func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error

Receive reads a message from the WebSocket

func (*WebSocketManager) Send

func (wm *WebSocketManager) Send(msg interface{}) error

Send sends a message through the WebSocket

func (*WebSocketManager) SendPing

func (wm *WebSocketManager) SendPing() error

SendPing sends a WebSocket ping to keep the connection alive

func (*WebSocketManager) StartHeartbeat

func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)

StartHeartbeat starts a goroutine that sends periodic pings

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL