inference

package
v0.3.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 11, 2026 License: MIT Imports: 14 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ChatMessage

type ChatMessage struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

ChatMessage represents a chat message

type GenerateRequest

type GenerateRequest struct {
	Model       string
	Prompt      string
	MaxTokens   int
	Temperature float64
	Stop        []string
}

GenerateRequest represents an inference request

type GenerateResponse

type GenerateResponse struct {
	Text         string
	FinishReason string
	Usage        Usage
}

GenerateResponse represents an inference response

type HuggingFaceService

type HuggingFaceService struct {
	// contains filtered or unexported fields
}

HuggingFaceService implements InferenceService for HuggingFace Inference API

func NewHuggingFaceService

func NewHuggingFaceService(endpoint, token string) *HuggingFaceService

NewHuggingFaceService creates a new HuggingFace inference service Uses HF_TOKEN environment variable if token is empty

func (*HuggingFaceService) Available

func (h *HuggingFaceService) Available() bool

Available checks if HuggingFace API is available

func (*HuggingFaceService) Generate

Generate performs text generation using HuggingFace Inference API

func (*HuggingFaceService) TextGenerationInference

func (h *HuggingFaceService) TextGenerationInference(ctx context.Context, endpoint string, req GenerateRequest) (*GenerateResponse, error)

TextGenerationInference sends request to a dedicated TGI endpoint

type HybridInference

type HybridInference struct {
	// contains filtered or unexported fields
}

HybridInference provides inference with fallback strategy

func NewHybridInference

func NewHybridInference(local, cloud InferenceService) *HybridInference

NewHybridInference creates a new hybrid inference service

func (*HybridInference) Available

func (h *HybridInference) Available() bool

Available returns true if any inference service is available

func (*HybridInference) Generate

Generate attempts local inference first, falls back to cloud

func (*HybridInference) SetPreferLocal

func (h *HybridInference) SetPreferLocal(prefer bool)

SetPreferLocal sets whether to prefer local inference

type InferenceService

type InferenceService interface {
	Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
	Available() bool
}

InferenceService defines the interface for LLM inference

type MockInferenceService

type MockInferenceService struct {
	// contains filtered or unexported fields
}

MockInferenceService provides a mock inference service for testing and development This should be replaced with real implementations (Ollama, vLLM, HuggingFace API, etc.)

func NewMockInferenceService

func NewMockInferenceService(model string) *MockInferenceService

NewMockInferenceService creates a new mock inference service

func (*MockInferenceService) Available

func (m *MockInferenceService) Available() bool

Available returns whether the service is available

func (*MockInferenceService) Generate

Generate returns a mock response

func (*MockInferenceService) SetAvailable

func (m *MockInferenceService) SetAvailable(available bool)

SetAvailable sets the availability status (for testing)

type ModelInfo

type ModelInfo struct {
	Name string
	Size int64
}

ModelInfo represents model information

type OllamaService

type OllamaService struct {
	// contains filtered or unexported fields
}

OllamaService implements InferenceService for Ollama

func NewOllamaService

func NewOllamaService(baseURL string) (*OllamaService, error)

NewOllamaService creates a new Ollama inference service with SSRF protection

func (*OllamaService) Available

func (o *OllamaService) Available() bool

Available checks if Ollama is available

func (*OllamaService) Chat

func (o *OllamaService) Chat(ctx context.Context, model string, messages []ChatMessage) (*GenerateResponse, error)

Chat performs chat completion using Ollama

func (*OllamaService) Generate

Generate performs inference using Ollama

func (*OllamaService) ListModels

func (o *OllamaService) ListModels(ctx context.Context) ([]ModelInfo, error)

ListModels returns available models

type Usage

type Usage struct {
	PromptTokens     int
	CompletionTokens int
	TotalTokens      int
}

Usage represents token usage information

type VLLMService

type VLLMService struct {
	// contains filtered or unexported fields
}

VLLMService implements InferenceService for vLLM (OpenAI-compatible API)

func NewVLLMService

func NewVLLMService(baseURL, apiKey string) *VLLMService

NewVLLMService creates a new vLLM inference service

func (*VLLMService) Available

func (v *VLLMService) Available() bool

Available checks if vLLM is available

func (*VLLMService) BatchGenerate

func (v *VLLMService) BatchGenerate(ctx context.Context, reqs []GenerateRequest) ([]*GenerateResponse, error)

BatchGenerate performs batch inference

func (*VLLMService) ChatCompletion

func (v *VLLMService) ChatCompletion(ctx context.Context, model string, messages []ChatMessage, maxTokens int) (*GenerateResponse, error)

ChatCompletion performs chat completion using vLLM

func (*VLLMService) Generate

Generate performs inference using vLLM completions endpoint

func (*VLLMService) ListModels

func (v *VLLMService) ListModels(ctx context.Context) ([]ModelInfo, error)

ListModels returns available models from vLLM

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL