inference

package

v0.1.2 Latest Latest Go to latest Published: Dec 7, 2025 License: MIT Imports: 13 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/aixgo-dev/aixgo

Links

Open Source Insights

Documentation ¶

Index ¶

type ChatMessage
type GenerateRequest
type GenerateResponse
type HuggingFaceService
- func NewHuggingFaceService(endpoint, token string) *HuggingFaceService
- func (h *HuggingFaceService) Available() bool
- func (h *HuggingFaceService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
- func (h *HuggingFaceService) TextGenerationInference(ctx context.Context, endpoint string, req GenerateRequest) (*GenerateResponse, error)
type HybridInference
- func NewHybridInference(local, cloud InferenceService) *HybridInference
- func (h *HybridInference) Available() bool
- func (h *HybridInference) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
- func (h *HybridInference) SetPreferLocal(prefer bool)
type InferenceService
type MockInferenceService
- func NewMockInferenceService(model string) *MockInferenceService
- func (m *MockInferenceService) Available() bool
- func (m *MockInferenceService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
- func (m *MockInferenceService) SetAvailable(available bool)
type ModelInfo
type OllamaService
- func NewOllamaService(baseURL string) *OllamaService
- func (o *OllamaService) Available() bool
- func (o *OllamaService) Chat(ctx context.Context, model string, messages []ChatMessage) (*GenerateResponse, error)
- func (o *OllamaService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
- func (o *OllamaService) ListModels(ctx context.Context) ([]ModelInfo, error)
type Usage
type VLLMService
- func NewVLLMService(baseURL, apiKey string) *VLLMService
- func (v *VLLMService) Available() bool
- func (v *VLLMService) BatchGenerate(ctx context.Context, reqs []GenerateRequest) ([]*GenerateResponse, error)
- func (v *VLLMService) ChatCompletion(ctx context.Context, model string, messages []ChatMessage, maxTokens int) (*GenerateResponse, error)
- func (v *VLLMService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
- func (v *VLLMService) ListModels(ctx context.Context) ([]ModelInfo, error)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type ChatMessage ¶

type ChatMessage struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

ChatMessage represents a chat message

type GenerateRequest ¶

type GenerateRequest struct {
	Model       string
	Prompt      string
	MaxTokens   int
	Temperature float64
	Stop        []string
}

GenerateRequest represents an inference request

type GenerateResponse ¶

type GenerateResponse struct {
	Text         string
	FinishReason string
	Usage        Usage
}

GenerateResponse represents an inference response

type HuggingFaceService ¶

type HuggingFaceService struct {
	// contains filtered or unexported fields
}

HuggingFaceService implements InferenceService for HuggingFace Inference API

func NewHuggingFaceService ¶

func NewHuggingFaceService(endpoint, token string) *HuggingFaceService

NewHuggingFaceService creates a new HuggingFace inference service Uses HF_TOKEN environment variable if token is empty

func (*HuggingFaceService) Available ¶

func (h *HuggingFaceService) Available() bool

Available checks if HuggingFace API is available

func (*HuggingFaceService) Generate ¶

func (h *HuggingFaceService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)

Generate performs text generation using HuggingFace Inference API

func (*HuggingFaceService) TextGenerationInference ¶

func (h *HuggingFaceService) TextGenerationInference(ctx context.Context, endpoint string, req GenerateRequest) (*GenerateResponse, error)

TextGenerationInference sends request to a dedicated TGI endpoint

type HybridInference ¶

type HybridInference struct {
	// contains filtered or unexported fields
}

HybridInference provides inference with fallback strategy

func NewHybridInference ¶

func NewHybridInference(local, cloud InferenceService) *HybridInference

NewHybridInference creates a new hybrid inference service

func (*HybridInference) Available ¶

func (h *HybridInference) Available() bool

Available returns true if any inference service is available

func (*HybridInference) Generate ¶

func (h *HybridInference) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)

Generate attempts local inference first, falls back to cloud

func (*HybridInference) SetPreferLocal ¶

func (h *HybridInference) SetPreferLocal(prefer bool)

SetPreferLocal sets whether to prefer local inference

type InferenceService ¶

type InferenceService interface {
	Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
	Available() bool
}

InferenceService defines the interface for LLM inference

type MockInferenceService ¶

type MockInferenceService struct {
	// contains filtered or unexported fields
}

MockInferenceService provides a mock inference service for testing and development This should be replaced with real implementations (Ollama, vLLM, HuggingFace API, etc.)

func NewMockInferenceService ¶

func NewMockInferenceService(model string) *MockInferenceService

NewMockInferenceService creates a new mock inference service

func (*MockInferenceService) Available ¶

func (m *MockInferenceService) Available() bool

Available returns whether the service is available

func (*MockInferenceService) Generate ¶

func (m *MockInferenceService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)

Generate returns a mock response

func (*MockInferenceService) SetAvailable ¶

func (m *MockInferenceService) SetAvailable(available bool)

SetAvailable sets the availability status (for testing)

type ModelInfo ¶

type ModelInfo struct {
	Name string
	Size int64
}

ModelInfo represents model information

type OllamaService ¶

type OllamaService struct {
	// contains filtered or unexported fields
}

OllamaService implements InferenceService for Ollama

func NewOllamaService ¶

func NewOllamaService(baseURL string) *OllamaService

NewOllamaService creates a new Ollama inference service

func (*OllamaService) Available ¶

func (o *OllamaService) Available() bool

Available checks if Ollama is available

func (*OllamaService) Chat ¶

func (o *OllamaService) Chat(ctx context.Context, model string, messages []ChatMessage) (*GenerateResponse, error)

Chat performs chat completion using Ollama

func (*OllamaService) Generate ¶

func (o *OllamaService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)

Generate performs inference using Ollama

func (*OllamaService) ListModels ¶

func (o *OllamaService) ListModels(ctx context.Context) ([]ModelInfo, error)

ListModels returns available models

type Usage ¶

type Usage struct {
	PromptTokens     int
	CompletionTokens int
	TotalTokens      int
}

Usage represents token usage information

type VLLMService ¶

type VLLMService struct {
	// contains filtered or unexported fields
}

VLLMService implements InferenceService for vLLM (OpenAI-compatible API)

func NewVLLMService ¶

func NewVLLMService(baseURL, apiKey string) *VLLMService

NewVLLMService creates a new vLLM inference service

func (*VLLMService) Available ¶

func (v *VLLMService) Available() bool

Available checks if vLLM is available

func (*VLLMService) BatchGenerate ¶

func (v *VLLMService) BatchGenerate(ctx context.Context, reqs []GenerateRequest) ([]*GenerateResponse, error)

BatchGenerate performs batch inference

func (*VLLMService) ChatCompletion ¶

func (v *VLLMService) ChatCompletion(ctx context.Context, model string, messages []ChatMessage, maxTokens int) (*GenerateResponse, error)

ChatCompletion performs chat completion using vLLM

func (*VLLMService) Generate ¶

func (v *VLLMService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)

Generate performs inference using vLLM completions endpoint

func (*VLLMService) ListModels ¶

func (v *VLLMService) ListModels(ctx context.Context) ([]ModelInfo, error)

ListModels returns available models from vLLM

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL