vision

package
v0.4.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 25, 2026 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AnalyzeImageTool

func AnalyzeImageTool(ctx context.Context, args map[string]interface{}) (string, error)

AnalyzeImageTool is the tool function for image analysis

func GenerateImageTool

func GenerateImageTool(ctx context.Context, args map[string]interface{}) (string, error)

GenerateImageTool is the tool function for image generation

Types

type AnalysisResult

type AnalysisResult struct {
	Description string                 `json:"description"`
	Tags        []string               `json:"tags"`
	Objects     []DetectedObject       `json:"objects"`
	Text        string                 `json:"text"` // OCR text
	Faces       []DetectedFace         `json:"faces"`
	Colors      []ColorInfo            `json:"colors"`
	Confidence  float64                `json:"confidence"`
	Metadata    map[string]interface{} `json:"metadata"`
}

AnalysisResult contains image analysis results

type AnthropicVisionProvider

type AnthropicVisionProvider struct{}

func (*AnthropicVisionProvider) AnalyzeImage

func (p *AnthropicVisionProvider) AnalyzeImage(ctx context.Context, imageURL string, prompt string) (*AnalysisResult, error)

func (*AnthropicVisionProvider) GenerateImage

func (p *AnthropicVisionProvider) GenerateImage(ctx context.Context, prompt string, options *GenerateOptions) (*GenerationResult, error)

func (*AnthropicVisionProvider) Name

func (p *AnthropicVisionProvider) Name() string

type Box

type Box struct {
	X      float64 `json:"x"`
	Y      float64 `json:"y"`
	Width  float64 `json:"width"`
	Height float64 `json:"height"`
}

Box represents a bounding box

type ColorInfo

type ColorInfo struct {
	Hex        string  `json:"hex"`
	RGB        [3]int  `json:"rgb"`
	Percentage float64 `json:"percentage"`
}

ColorInfo represents dominant color

type Config

type Config struct {
	Enabled       bool
	DefaultMode   string // "understand" or "generate"
	ImageCacheDir string
	MaxImageSize  int64 // bytes
	Timeout       time.Duration
}

Config holds vision configuration

func DefaultConfig

func DefaultConfig() *Config

DefaultConfig returns default vision configuration

type DetectedFace

type DetectedFace struct {
	BoundingBox Box     `json:"bounding_box"`
	Age         int     `json:"age,omitempty"`
	Gender      string  `json:"gender,omitempty"`
	Emotion     string  `json:"emotion,omitempty"`
	Confidence  float64 `json:"confidence"`
}

DetectedFace represents a detected face

type DetectedObject

type DetectedObject struct {
	Label       string  `json:"label"`
	Confidence  float64 `json:"confidence"`
	BoundingBox Box     `json:"bounding_box"`
}

DetectedObject represents a detected object

type GenerateOptions

type GenerateOptions struct {
	Width          int    `json:"width"`
	Height         int    `json:"height"`
	Model          string `json:"model,omitempty"`
	Quality        string `json:"quality,omitempty"` // standard, hd
	Style          string `json:"style,omitempty"`   // natural, vivid
	Seed           int64  `json:"seed,omitempty"`
	NumImages      int    `json:"num_images,omitempty"`
	ReferenceImage string `json:"reference_image,omitempty"` // for img2img
	MaskImage      string `json:"mask_image,omitempty"`      // for inpainting
	NegativePrompt string `json:"negative_prompt,omitempty"`
}

GenerateOptions contains image generation options

type GenerationResult

type GenerationResult struct {
	Images         []ImageInfo `json:"images"`
	Prompt         string      `json:"prompt"`
	Provider       string      `json:"provider"`
	Model          string      `json:"model"`
	ProcessingTime float64     `json:"processing_time_seconds"`
	Seed           int64       `json:"seed,omitempty"`
}

GenerationResult contains generation results

type GoogleVisionProvider

type GoogleVisionProvider struct{}

func (*GoogleVisionProvider) AnalyzeImage

func (p *GoogleVisionProvider) AnalyzeImage(ctx context.Context, imageURL string, prompt string) (*AnalysisResult, error)

func (*GoogleVisionProvider) GenerateImage

func (p *GoogleVisionProvider) GenerateImage(ctx context.Context, prompt string, options *GenerateOptions) (*GenerationResult, error)

func (*GoogleVisionProvider) Name

func (p *GoogleVisionProvider) Name() string

type ImageInfo

type ImageInfo struct {
	URL           string `json:"url,omitempty"`
	Base64        string `json:"base64,omitempty"`
	LocalPath     string `json:"local_path,omitempty"`
	MimeType      string `json:"mime_type"`
	Width         int    `json:"width"`
	Height        int    `json:"height"`
	FileSize      int64  `json:"file_size"`
	RevisedPrompt string `json:"revised_prompt,omitempty"`
}

ImageInfo contains generated image information

type LocalVisionProvider

type LocalVisionProvider struct{}

func (*LocalVisionProvider) AnalyzeImage

func (p *LocalVisionProvider) AnalyzeImage(ctx context.Context, imageURL string, prompt string) (*AnalysisResult, error)

func (*LocalVisionProvider) GenerateImage

func (p *LocalVisionProvider) GenerateImage(ctx context.Context, prompt string, options *GenerateOptions) (*GenerationResult, error)

func (*LocalVisionProvider) Name

func (p *LocalVisionProvider) Name() string

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager handles image understanding and generation

func NewManager

func NewManager(config *Config) *Manager

NewManager creates a new vision manager

func (*Manager) AnalyzeImage

func (m *Manager) AnalyzeImage(ctx context.Context, imagePath string, prompt string, providerName string) (*AnalysisResult, error)

AnalyzeImage analyzes an image using specified provider

func (*Manager) ExtractImagesFromURL

func (m *Manager) ExtractImagesFromURL(pageURL string) ([]string, error)

ExtractImagesFromURL extracts image URLs from a webpage

func (*Manager) GenerateImage

func (m *Manager) GenerateImage(ctx context.Context, prompt string, options *GenerateOptions, providerName string) (*GenerationResult, error)

GenerateImage generates an image using specified provider

func (*Manager) GetProvider

func (m *Manager) GetProvider(name string) Provider

GetProvider returns a provider by name

func (*Manager) ListProviders

func (m *Manager) ListProviders() []string

ListProviders returns all registered providers

func (*Manager) RegisterProvider

func (m *Manager) RegisterProvider(name string, provider Provider)

RegisterProvider registers a custom vision provider

type OpenAIVisionProvider

type OpenAIVisionProvider struct{}

func (*OpenAIVisionProvider) AnalyzeImage

func (p *OpenAIVisionProvider) AnalyzeImage(ctx context.Context, imageURL string, prompt string) (*AnalysisResult, error)

func (*OpenAIVisionProvider) GenerateImage

func (p *OpenAIVisionProvider) GenerateImage(ctx context.Context, prompt string, options *GenerateOptions) (*GenerationResult, error)

func (*OpenAIVisionProvider) Name

func (p *OpenAIVisionProvider) Name() string

type Provider

type Provider interface {
	// AnalyzeImage analyzes an image and returns description
	AnalyzeImage(ctx context.Context, imageURL string, prompt string) (*AnalysisResult, error)

	// GenerateImage generates an image from prompt
	GenerateImage(ctx context.Context, prompt string, options *GenerateOptions) (*GenerationResult, error)

	// Name returns provider name
	Name() string
}

Provider defines interface for vision providers

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL