gemini

package module
v0.0.0-...-20bcb34 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 18, 2025 License: MIT Imports: 18 Imported by: 0

README

Gemini

A Go client library for Google's Gemini AI models with advanced rate limiting, model management, and OpenTelemetry integration.

Features

  • Multi-model Support: Manage multiple Gemini models with different configurations
  • Advanced Rate Limiting: Built-in rate limiting with multiple tiers and backoff strategies
  • OpenTelemetry Integration: Full observability with tracing and metrics
  • Context-aware: Proper context propagation throughout the API
  • Type-safe Model Management: Strongly typed model names and configurations

Installation

go get github.com/agentflare-ai/agentml/gemini

Quick Start

package main

import (
    "context"
    "log"
    
    "github.com/agentflare-ai/agentml/gemini"
    "google.golang.org/genai"
)

func main() {
    ctx := context.Background()
    
    // Define your models
    models := map[gemini.ModelName]*gemini.Model{
        "gemini-pro": {
            Name: "gemini-pro",
            // Configure model parameters
        },
    }
    
    // Create client
    client, err := gemini.NewClient(ctx, models, &genai.ClientConfig{
        // Your API configuration
    })
    if err != nil {
        log.Fatal(err)
    }
    defer client.Close()
    
    // Generate content
    response, err := client.GenerateContent(ctx, "gemini-pro", 
        []*genai.Content{{
            Parts: []genai.Part{genai.Text("Hello, world!")},
        }}, nil)
    if err != nil {
        log.Fatal(err)
    }
    
    // Use response...
}

Rate Limiting

The client includes sophisticated rate limiting with multiple tiers:

  • Tier-based Limits: Different rate limits for different usage patterns
  • Adaptive Backoff: Intelligent backoff strategies when limits are hit
  • Request Prioritization: Priority queuing for different request types

Model Management

Models are managed through a strongly-typed system:

type Model struct {
    Name        ModelName
    RateLimit   *RateLimit
    Config      *ModelConfig
    // Additional model-specific settings
}

Observability

Full OpenTelemetry support for monitoring and debugging:

  • Request/response tracing
  • Rate limiting metrics
  • Error tracking
  • Performance monitoring

License

This project is part of the agentml ecosystem.

Documentation

Index

Examples

Constants

View Source
const GeminiNamespaceURI = "github.com/agentflare-ai/agentml/gemini"

GeminiNamespaceURI is the XML namespace URI used for Gemini executable elements.

Variables

View Source
var (
	Tier1RateLimits = map[ModelName]RateLimiterOptions{
		Pro: {
			RPM: 150,
			RPD: 2_000_000,
			TPM: 10_000,
		},
		Flash: {
			RPM: 1_000,
			RPD: 1_000_000,
			TPM: 10_000,
		},
		FlashLite: {
			RPM: 4_000,
			RPD: 4_000_000,
			TPM: rate.Inf,
		},
		Embedding001: {
			RPM: 3_000,
			RPD: 10_000_000,
			TPM: rate.Inf,
		},
	}
	Tier2RateLimits = map[ModelName]RateLimiterOptions{
		Pro: {
			RPM: 1_000,
			RPD: 5_000_000,
			TPM: 50_000,
		},
		Flash: {
			RPM: 2_000,
			RPD: 3_000_000,
			TPM: 100_000,
		},
		FlashLite: {
			RPM: 10_000,
			RPD: 10_000_000,
			TPM: rate.Inf,
		},
		Embedding001: {
			RPM: 3_000,
			RPD: 10_000_000,
			TPM: rate.Inf,
		},
	}
	Tier3RateLimits = map[ModelName]RateLimiterOptions{
		Pro: {
			RPM: 2_000,
			RPD: rate.Inf,
			TPM: 8_000_000,
		},
		Flash: {
			RPM: 10_000,
			RPD: rate.Inf,
			TPM: 8_000_000,
		},
		FlashLite: {
			RPM: 30_000,
			RPD: rate.Inf,
			TPM: 30_000_000,
		},
		Embedding001: {
			RPM: 3_000,
			RPD: 10_000_000,
			TPM: rate.Inf,
		},
	}
	TokenCountRateLimiter = NewRateLimiter(RateLimiterOptions{
		RPM: 3000,
		RPD: 10_000_000,
		TPM: rate.Inf,
	})

	Tier1Models = map[ModelName]*Model{
		Pro:          NewModel(Pro, NewRateLimiter(Tier1RateLimits[Pro]), TokenCountRateLimiter),
		Flash:        NewModel(Flash, NewRateLimiter(Tier1RateLimits[Flash]), TokenCountRateLimiter),
		FlashLite:    NewModel(FlashLite, NewRateLimiter(Tier1RateLimits[FlashLite]), TokenCountRateLimiter),
		Embedding001: NewModel(Embedding001, NewRateLimiter(Tier1RateLimits[Embedding001]), TokenCountRateLimiter),
	}
	Tier2Models = map[ModelName]*Model{
		Pro:          NewModel(Pro, NewRateLimiter(Tier2RateLimits[Pro]), TokenCountRateLimiter),
		Flash:        NewModel(Flash, NewRateLimiter(Tier2RateLimits[Flash]), TokenCountRateLimiter),
		FlashLite:    NewModel(FlashLite, NewRateLimiter(Tier2RateLimits[FlashLite]), TokenCountRateLimiter),
		Embedding001: NewModel(Embedding001, NewRateLimiter(Tier2RateLimits[Embedding001]), TokenCountRateLimiter),
	}
	Tier3Models = map[ModelName]*Model{
		Pro:          NewModel(Pro, NewRateLimiter(Tier3RateLimits[Pro]), TokenCountRateLimiter),
		Flash:        NewModel(Flash, NewRateLimiter(Tier3RateLimits[Flash]), TokenCountRateLimiter),
		FlashLite:    NewModel(FlashLite, NewRateLimiter(Tier3RateLimits[FlashLite]), TokenCountRateLimiter),
		Embedding001: NewModel(Embedding001, NewRateLimiter(Tier3RateLimits[Embedding001]), TokenCountRateLimiter),
	}
)

Functions

func IsRateLimitError

func IsRateLimitError(err error) bool

IsRateLimitError checks if an error is a rate limiting error.

func Loader

func Loader(deps *Deps) agentml.NamespaceLoader

Loader returns a NamespaceLoader for the Gemini namespace. It closes over DI deps (Gemini client) and the interpreter.

func NewGenerate

func NewGenerate(ctx context.Context, element xmldom.Element) (agentml.Executor, error)

NewGenerate creates a new Generate executable from an XML element. It constructs executable content from the provided xmldom.Element by extracting attributes needed for generation.

The function validates that required attributes are present and returns an error if the element is malformed or missing required data.

Parameters:

  • ctx: Context for the operation (currently unused but follows interface)
  • element: The XML element containing the generation configuration

Returns:

  • agentml.Executor: A new Generate instance implementing the interface
  • error: An error if the element is invalid or missing required attributes

Types

type Client

type Client struct {
	// contains filtered or unexported fields
}

func NewClient

func NewClient(ctx context.Context, models map[ModelName]*Model, config *genai.ClientConfig) (*Client, error)
Example

ExampleNewClient demonstrates how to create a client with API key from environment

ctx := context.Background()

// Load API key from environment
config := &genai.ClientConfig{
	APIKey: loadAPIKeyFromEnv(),
}

// Use predefined models with rate limiting
models := Tier1Models

client, err := NewClient(ctx, models, config)
if err != nil {
	// Handle error
	return
}

// Use client in your application
_ = client

func (*Client) CountTokens

func (c *Client) CountTokens(ctx context.Context, model ModelName, contents []*genai.Content, config *genai.CountTokensConfig) (*genai.CountTokensResponse, error)

func (*Client) EmbedContent

func (c *Client) EmbedContent(ctx context.Context, model ModelName, contents []*genai.Content, config *genai.EmbedContentConfig) (*genai.EmbedContentResponse, error)

func (*Client) GenerateContent

func (c *Client) GenerateContent(ctx context.Context, model ModelName, contents []*genai.Content, config *genai.GenerateContentConfig) (*genai.GenerateContentResponse, error)

func (*Client) GenerateWithAutoSelection

func (c *Client) GenerateWithAutoSelection(ctx context.Context, prompt string, config *genai.GenerateContentConfig) (*genai.GenerateContentResponse, *ModelSelectionResult, error)

GenerateWithAutoSelection automatically selects the best model based on prompt complexity and provides fallback to other models if rate limits are encountered.

func (*Client) SetSelectionStrategy

func (c *Client) SetSelectionStrategy(strategy *ModelSelectionStrategy)

SetSelectionStrategy allows customization of the model selection strategy.

func (*Client) StreamGenerate

func (c *Client) StreamGenerate(ctx context.Context, model ModelName, contents []*genai.Content, config *genai.GenerateContentConfig, respChan chan<- *genai.GenerateContentResponse) error

type ClientOptions

type ClientOptions = genai.ClientConfig

type Deps

type Deps struct {
	Client *Client
}

Deps holds dependencies for Gemini executables. Aligns with the DI style used across the project (e.g., memory package).

type Generate

type Generate struct {
	xmldom.Element

	// Model specifies the Gemini AI model to use for generation.
	// Common values include "gemini-1.5-flash", "gemini-1.5-pro", etc.
	Model string `xml:"model,attr"`

	ModelExpr string `xml:"modelexpr,attr"`

	// Prompt contains the prompt or template for AI generation.
	// This can be a static string or contain data model expressions.
	Prompt string `xml:"prompt,attr"`

	// Location specifies where in the data model to store the generated result.
	// This should be a valid data model location expression.
	Location string `xml:"location,attr"`

	// Stream indicates whether to use streaming generation for real-time responses.
	// When true, responses are delivered progressively as they are generated.
	Stream bool `xml:"stream,attr"`

	// OnChunk specifies the data model location for handling streaming chunks.
	// Only used when Stream is true. Each chunk is assigned to this location.
	OnChunk string `xml:"onchunk,attr"`

	// AutoSelect enables automatic model selection based on prompt complexity.
	// When true, the model attribute becomes optional and is selected automatically.
	AutoSelect bool `xml:"autoselect,attr"`

	// ComplexityHint provides a hint about task complexity for model selection.
	// Valid values: "simple", "moderate", "complex". Optional.
	ComplexityHint string `xml:"complexity,attr"`
	// contains filtered or unexported fields
}

Generate represents a Gemini AI generation executable content element for SCXML. It implements the scxml.Executable interface to provide AI generation capabilities within SCXML state machines using Google's Gemini AI models.

The Generate struct maps to XML elements with the following attributes:

  • model: Specifies the Gemini model to use (e.g., "gemini-1.5-flash", "gemini-1.5-pro")
  • prompt: The prompt or template for AI generation
  • location: Data model location where the generated result should be stored

Example XML usage:

<gemini:generate model="gemini-1.5-flash"
                 prompt="Generate a greeting message"
                 location="greeting" />

func (*Generate) Execute

func (g *Generate) Execute(ctx context.Context, interpreter agentml.Interpreter) error

Execute implements the scxml.Executable interface for Generate. It performs AI generation using the specified Gemini model and prompt, then stores the result in the specified data model location.

The execution process:

  1. Validates that all required attributes are present
  2. Evaluates the prompt expression using the data model (if needed)
  3. Calls the Gemini AI service to generate content
  4. Stores the generated result in the specified location

Returns an error if generation fails or if required attributes are missing.

func (*Generate) SetClient

func (g *Generate) SetClient(client *Client)

SetClient sets the Gemini client for this Generate instance. This enables dependency injection for testing and configuration.

type Model

type Model struct {
	Name                  ModelName
	GenerateRateLimiter   *RateLimiter
	TokenCountRateLimiter *RateLimiter
}

func NewModel

func NewModel(name ModelName, generateRateLimit *RateLimiter, tokenCountRateLimit *RateLimiter) *Model

type ModelName

type ModelName string
const (
	FlashLite    ModelName = "gemini-2.5-flash-lite"
	Pro          ModelName = "gemini-2.5-pro"
	Flash        ModelName = "gemini-2.5-flash"
	Ultra        ModelName = "gemini-2.5-pro" // compatibility alias used by coder package
	Embedding001 ModelName = "gemini-embedding-001"
)

type ModelSelectionResult

type ModelSelectionResult struct {
	// PrimaryModel is the recommended model for the task
	PrimaryModel ModelName
	// FallbackModels provides alternative models if the primary fails
	FallbackModels []ModelName
	// Complexity contains the analyzed task complexity
	Complexity TaskComplexity
	// SelectionReason explains why this model was chosen
	SelectionReason string
}

ModelSelectionResult contains the result of model selection including fallback options.

type ModelSelectionStrategy

type ModelSelectionStrategy struct {
	// PreferredModels maps complexity levels to preferred model names
	PreferredModels map[string]ModelName
	// FallbackChain defines the fallback order when preferred models are rate-limited
	FallbackChain []ModelName
	// MaxFallbackAttempts limits the number of fallback attempts
	MaxFallbackAttempts int
}

ModelSelectionStrategy defines the strategy for automatically selecting models based on task complexity.

func NewModelSelectionStrategy

func NewModelSelectionStrategy() *ModelSelectionStrategy

NewModelSelectionStrategy creates a new model selection strategy with default mappings.

func (*ModelSelectionStrategy) GenerateWithFallback

GenerateWithFallback attempts to generate content with automatic model fallback on rate limits.

func (*ModelSelectionStrategy) SelectModel

SelectModel selects the best model for a given task prompt. It analyzes complexity and returns the preferred model with fallback options.

type RateLimitError

type RateLimitError struct {
	Model      ModelName
	RetryAfter time.Duration
	Underlying error
}

RateLimitError represents a rate limiting error that can trigger model fallback.

func (*RateLimitError) Error

func (e *RateLimitError) Error() string

type RateLimiter

type RateLimiter struct {
	// contains filtered or unexported fields
}

func NewRateLimiter

func NewRateLimiter(options RateLimiterOptions) *RateLimiter

func (*RateLimiter) Wait

func (r *RateLimiter) Wait(ctx context.Context, maybeTokens ...int) error

type RateLimiterOptions

type RateLimiterOptions struct {
	RPM rate.Limit // Requests per minute
	RPD rate.Limit // Requests per day
	TPM rate.Limit // Tokens per minute

	// Optional bursts to override defaults. If zero or negative, defaults are used:
	//   BurstRPM: ceil(RPM)
	//   BurstRPD: ceil(RPD)
	//   BurstTPM: ceil(TPM)
	BurstRPM int
	BurstRPD int
	BurstTPM int
}

type TaskComplexity

type TaskComplexity struct {
	// Level indicates the complexity level: "simple", "moderate", or "complex"
	Level string
	// Reason explains why this complexity level was chosen
	Reason string
	// Confidence indicates the confidence score (0.0 to 1.0)
	Confidence float64
}

TaskComplexity represents the complexity analysis of a task prompt.

func AnalyzeComplexity

func AnalyzeComplexity(prompt string) TaskComplexity

AnalyzeComplexity analyzes a prompt to determine task complexity using simple heuristics. It categorizes tasks as simple, moderate, or complex based on keyword matching and prompt length.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL