gemini

package
v0.2.49 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 2, 2026 License: MIT Imports: 14 Imported by: 1

Documentation

Index

Constants

View Source
const (
	// Stable models
	ModelGemini25Pro       = "gemini-2.5-pro"
	ModelGemini25Flash     = "gemini-2.5-flash"
	ModelGemini25FlashLite = "gemini-2.5-flash-lite"
	ModelGemini20Flash     = "gemini-2.0-flash"
	ModelGemini20FlashLite = "gemini-2.0-flash-lite"
	ModelGemini15Pro       = "gemini-1.5-pro"
	ModelGemini15Flash     = "gemini-1.5-flash"
	ModelGemini15Flash8B   = "gemini-1.5-flash-8b"

	// Preview/Experimental models
	ModelGeminiLive25FlashPreview            = "gemini-live-2.5-flash-preview"
	ModelGemini25FlashPreviewNativeAudio     = "gemini-2.5-flash-preview-native-audio-dialog"
	ModelGemini25FlashExpNativeAudioThinking = "gemini-2.5-flash-exp-native-audio-thinking-dialog"
	ModelGemini25FlashPreviewTTS             = "gemini-2.5-flash-preview-tts"
	ModelGemini25ProPreviewTTS               = "gemini-2.5-pro-preview-tts"
	ModelGemini20FlashPreviewImageGen        = "gemini-2.0-flash-preview-image-generation"
	ModelGemini20FlashLive001                = "gemini-2.0-flash-live-001"

	// Image generation models
	ModelGemini25FlashImage = "gemini-2.5-flash-image"

	// Multi-turn image editing models (Nano Banana Pro)
	ModelGemini3ProImagePreview = "gemini-3-pro-image-preview"

	// Default model
	DefaultModel = ModelGemini15Flash

	// Default model for multi-turn image editing
	DefaultImageEditModel = ModelGemini3ProImagePreview
)

Model constants for Gemini API

Variables

This section is empty.

Functions

func GetMaxThinkingTokens

func GetMaxThinkingTokens(model string) *int32

GetMaxThinkingTokens returns the maximum thinking tokens for a model

func GetSupportedImageSizes added in v0.2.36

func GetSupportedImageSizes(model string) []string

GetSupportedImageSizes returns the supported image sizes for the model

func GetSupportedOutputFormats added in v0.2.35

func GetSupportedOutputFormats(model string) []string

GetSupportedOutputFormats returns the supported output formats for image generation

func IsAudioModel

func IsAudioModel(model string) bool

IsAudioModel returns true if the model supports audio capabilities

func IsVisionModel

func IsVisionModel(model string) bool

IsVisionModel returns true if the model supports vision capabilities

func SupportsImageGeneration added in v0.2.35

func SupportsImageGeneration(model string) bool

SupportsImageGeneration returns true if the model supports image generation

func SupportsMultiTurnImageEditing added in v0.2.36

func SupportsMultiTurnImageEditing(model string) bool

SupportsMultiTurnImageEditing returns true if the model supports multi-turn image editing

func SupportsThinking

func SupportsThinking(model string) bool

SupportsThinking returns true if the model supports thinking capabilities

func SupportsToolCalling

func SupportsToolCalling(model string) bool

SupportsToolCalling returns true if the model supports function/tool calling

func ValidateThinkingBudget

func ValidateThinkingBudget(model string, budget int32) error

ValidateThinkingBudget validates if a thinking budget is within model limits

func WithReasoning

func WithReasoning(reasoning string) interfaces.GenerateOption

WithReasoning creates a GenerateOption to set the reasoning mode reasoning can be "none" (direct answers), "minimal" (brief explanations), or "comprehensive" (detailed step-by-step reasoning)

func WithResponseFormat

func WithResponseFormat(format interfaces.ResponseFormat) interfaces.GenerateOption

WithResponseFormat creates a GenerateOption to set the response format

func WithStopSequences

func WithStopSequences(stopSequences []string) interfaces.GenerateOption

WithStopSequences creates a GenerateOption to set the stop sequences

func WithSystemMessage

func WithSystemMessage(systemMessage string) interfaces.GenerateOption

WithSystemMessage creates a GenerateOption to set the system message

func WithTemperature

func WithTemperature(temperature float64) interfaces.GenerateOption

WithTemperature creates a GenerateOption to set the temperature

func WithTopP

func WithTopP(topP float64) interfaces.GenerateOption

WithTopP creates a GenerateOption to set the top_p

Types

type GeminiClient

type GeminiClient struct {
	// contains filtered or unexported fields
}

GeminiClient implements the LLM interface for Google Gemini API

func NewClient

func NewClient(ctx context.Context, options ...Option) (*GeminiClient, error)

NewClient creates a new Gemini client

func (*GeminiClient) CreateImageEditSession added in v0.2.36

func (c *GeminiClient) CreateImageEditSession(ctx context.Context, options *interfaces.ImageEditSessionOptions) (interfaces.ImageEditSession, error)

CreateImageEditSession creates a new multi-turn image editing session. The session maintains conversation context for iterative image creation and modification.

func (*GeminiClient) Generate

func (c *GeminiClient) Generate(ctx context.Context, prompt string, options ...interfaces.GenerateOption) (string, error)

Generate generates text from a prompt

func (*GeminiClient) GenerateDetailed added in v0.1.13

func (c *GeminiClient) GenerateDetailed(ctx context.Context, prompt string, options ...interfaces.GenerateOption) (*interfaces.LLMResponse, error)

GenerateDetailed generates text and returns detailed response information including token usage

func (*GeminiClient) GenerateImage added in v0.2.35

GenerateImage generates images from a text prompt using Gemini

func (*GeminiClient) GenerateStream

func (c *GeminiClient) GenerateStream(ctx context.Context, prompt string, options ...interfaces.GenerateOption) (<-chan interfaces.StreamEvent, error)

GenerateStream generates text with streaming response using native Gemini streaming

func (*GeminiClient) GenerateWithTools

func (c *GeminiClient) GenerateWithTools(ctx context.Context, prompt string, tools []interfaces.Tool, options ...interfaces.GenerateOption) (string, error)

GenerateWithTools implements interfaces.LLM.GenerateWithTools

func (*GeminiClient) GenerateWithToolsDetailed added in v0.1.13

func (c *GeminiClient) GenerateWithToolsDetailed(ctx context.Context, prompt string, tools []interfaces.Tool, options ...interfaces.GenerateOption) (*interfaces.LLMResponse, error)

GenerateWithToolsDetailed generates text with tools and returns detailed response information including token usage

func (*GeminiClient) GenerateWithToolsStream

func (c *GeminiClient) GenerateWithToolsStream(ctx context.Context, prompt string, tools []interfaces.Tool, options ...interfaces.GenerateOption) (<-chan interfaces.StreamEvent, error)

GenerateWithToolsStream generates text with tools and streaming response with real-time tool events

func (*GeminiClient) GetModel

func (c *GeminiClient) GetModel() string

GetModel returns the model name being used

func (*GeminiClient) Name

func (c *GeminiClient) Name() string

Name implements interfaces.LLM.Name

func (*GeminiClient) SupportedImageFormats added in v0.2.35

func (c *GeminiClient) SupportedImageFormats() []string

SupportedImageFormats returns the supported output formats for the configured model

func (*GeminiClient) SupportsImageGeneration added in v0.2.35

func (c *GeminiClient) SupportsImageGeneration() bool

SupportsImageGeneration returns true if the configured model supports image generation

func (*GeminiClient) SupportsMultiTurnImageEditing added in v0.2.36

func (c *GeminiClient) SupportsMultiTurnImageEditing() bool

SupportsMultiTurnImageEditing returns true if the configured model supports multi-turn conversational image editing.

func (*GeminiClient) SupportsStreaming

func (c *GeminiClient) SupportsStreaming() bool

SupportsStreaming implements interfaces.LLM.SupportsStreaming

type GeminiImageEditSession added in v0.2.36

type GeminiImageEditSession struct {
	// contains filtered or unexported fields
}

GeminiImageEditSession implements interfaces.ImageEditSession using Gemini chat API. It maintains a conversation context for iterative image creation and modification.

func (*GeminiImageEditSession) Close added in v0.2.36

func (s *GeminiImageEditSession) Close() error

Close closes the session and releases resources. Note: The genai Chat doesn't require explicit cleanup, but we implement this for consistency with the interface contract.

func (*GeminiImageEditSession) GetHistory added in v0.2.36

GetHistory returns the conversation history for this session.

func (*GeminiImageEditSession) SendMessage added in v0.2.36

SendMessage sends a text message and returns the response (text and/or image). The conversation context is automatically maintained by the underlying chat session.

func (*GeminiImageEditSession) SendMessageWithImage added in v0.2.36

SendMessageWithImage sends a message with an image reference for editing. This allows providing an external image for the model to modify.

type HarmCategory

type HarmCategory string

HarmCategory represents the harm category for safety filtering

const (
	HarmCategoryUnspecified      HarmCategory = "HARM_CATEGORY_UNSPECIFIED"
	HarmCategoryDerogatory       HarmCategory = "HARM_CATEGORY_DEROGATORY"
	HarmCategoryToxicity         HarmCategory = "HARM_CATEGORY_TOXICITY"
	HarmCategoryViolence         HarmCategory = "HARM_CATEGORY_VIOLENCE"
	HarmCategorySexual           HarmCategory = "HARM_CATEGORY_SEXUAL"
	HarmCategoryMedical          HarmCategory = "HARM_CATEGORY_MEDICAL"
	HarmCategoryDangerous        HarmCategory = "HARM_CATEGORY_DANGEROUS"
	HarmCategoryHarassment       HarmCategory = "HARM_CATEGORY_HARASSMENT"
	HarmCategoryHateSpeech       HarmCategory = "HARM_CATEGORY_HATE_SPEECH"
	HarmCategorySexuallyExplicit HarmCategory = "HARM_CATEGORY_SEXUALLY_EXPLICIT"
	HarmCategoryDangerousContent HarmCategory = "HARM_CATEGORY_DANGEROUS_CONTENT"
)

type ModelCapabilities

type ModelCapabilities struct {
	SupportsStreaming             bool
	SupportsToolCalling           bool
	SupportsVision                bool
	SupportsAudio                 bool
	SupportsThinking              bool
	SupportsImageGeneration       bool // Whether the model can generate images
	SupportsMultiTurnImageEditing bool // Whether the model supports conversational image editing
	MaxInputTokens                int
	MaxOutputTokens               int
	MaxThinkingTokens             *int32 // nil if thinking not supported
	MaxReferenceImages            int    // Max reference images for multi-turn editing (Gemini 3 Pro: 14)
	MaxObjectImages               int    // Max high-fidelity object images (Gemini 3 Pro: 6)
	MaxHumanImages                int    // Max human images for character consistency (Gemini 3 Pro: 5)
	SupportedMimeTypes            []string
	SupportedOutputFormats        []string // Output formats for image generation (e.g., "png", "jpeg")
	SupportedImageSizes           []string // Supported image sizes (e.g., "1K", "2K", "4K")
}

ModelCapabilities represents the capabilities of different Gemini models

func GetModelCapabilities

func GetModelCapabilities(model string) ModelCapabilities

GetModelCapabilities returns the capabilities for a given model

type Option

type Option func(*GeminiClient)

Option represents an option for configuring the Gemini client

func WithAPIKey added in v0.0.43

func WithAPIKey(apiKey string) Option

WithAPIKey sets the API key for Gemini API backend

func WithBackend added in v0.0.43

func WithBackend(backend genai.Backend) Option

WithBackend sets the backend for the Gemini client

func WithBaseURL

func WithBaseURL(baseURL string) Option

WithBaseURL sets the base URL for the Gemini client (not used with genai package)

func WithClient added in v0.0.43

func WithClient(existing *genai.Client) Option

WithClient injects an already initialized genai.Client. If set, NewClient won't build a new client

func WithCredentialsFile added in v0.0.43

func WithCredentialsFile(credentialsFile string) Option

WithCredentialsFile sets the path to a service account key file for Vertex AI authentication. If both WithCredentialsFile and WithCredentialsJSON are provided, JSON credentials take precedence. The file should contain a valid Google Cloud service account key in JSON format.

func WithCredentialsJSON added in v0.0.48

func WithCredentialsJSON(credentialsJSON []byte) Option

WithCredentialsJSON sets the service account key JSON bytes for Vertex AI authentication. If both WithCredentialsFile and WithCredentialsJSON are provided, JSON credentials take precedence. The bytes should contain a valid Google Cloud service account key in JSON format.

func WithDynamicThinking

func WithDynamicThinking() Option

WithDynamicThinking creates a client Option to enable dynamic thinking (no fixed budget)

func WithLocation added in v0.0.43

func WithLocation(location string) Option

WithLocation sets the GCP location for Vertex AI backend

func WithLogger

func WithLogger(logger logging.Logger) Option

WithLogger sets the logger for the Gemini client

func WithMaxOutputTokens added in v0.1.6

func WithMaxOutputTokens(maxTokens int32) Option

WithMaxOutputTokens sets the maximum number of output tokens to generate. This limits the length of the model's response.

func WithModel

func WithModel(model string) Option

WithModel sets the model for the Gemini client

func WithProjectID added in v0.0.43

func WithProjectID(projectID string) Option

WithProjectID sets the GCP project ID for Vertex AI backend

func WithRetry

func WithRetry(opts ...retry.Option) Option

WithRetry configures retry policy for the client

func WithThinking

func WithThinking(enabled bool) Option

WithThinking creates a client Option to enable/disable thinking

func WithThinkingBudget

func WithThinkingBudget(budget int32) Option

WithThinkingBudget creates a client Option to set thinking token budget

func WithThinkingConfig

func WithThinkingConfig(config ThinkingConfig) Option

WithThinkingConfig creates a client Option to set complete thinking configuration

func WithThoughtSignatures

func WithThoughtSignatures(signatures [][]byte) Option

WithThoughtSignatures creates a client Option to set thought signatures for multi-turn context

type ReasoningMode

type ReasoningMode string

ReasoningMode defines the reasoning approach for the model

const (
	ReasoningModeNone          ReasoningMode = "none"
	ReasoningModeMinimal       ReasoningMode = "minimal"
	ReasoningModeComprehensive ReasoningMode = "comprehensive"
)

type SafetySetting

type SafetySetting struct {
	Category  HarmCategory    `json:"category"`
	Threshold SafetyThreshold `json:"threshold"`
}

SafetySetting represents a safety setting for content filtering

func DefaultSafetySettings

func DefaultSafetySettings() []SafetySetting

DefaultSafetySettings returns default safety settings

type SafetyThreshold

type SafetyThreshold string

SafetyThreshold represents the safety filtering threshold

const (
	SafetyThresholdUnspecified         SafetyThreshold = "HARM_BLOCK_THRESHOLD_UNSPECIFIED"
	SafetyThresholdBlockLowAndAbove    SafetyThreshold = "BLOCK_LOW_AND_ABOVE"
	SafetyThresholdBlockMediumAndAbove SafetyThreshold = "BLOCK_MEDIUM_AND_ABOVE"
	SafetyThresholdBlockOnlyHigh       SafetyThreshold = "BLOCK_ONLY_HIGH"
	SafetyThresholdBlockNone           SafetyThreshold = "BLOCK_NONE"
)

type ThinkingConfig

type ThinkingConfig struct {
	// Whether to include thinking content in responses
	IncludeThoughts bool
	// Maximum tokens allocated for thinking (nil for dynamic thinking)
	ThinkingBudget *int32
	// Thought signatures for context preservation across multi-turn conversations
	ThoughtSignatures [][]byte
}

ThinkingConfig represents thinking/reasoning configuration for Gemini models

func DefaultThinkingConfig

func DefaultThinkingConfig() ThinkingConfig

DefaultThinkingConfig returns default thinking configuration

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL