Documentation
¶
Index ¶
- type ChatMessage
- type GenerateRequest
- type GenerateResponse
- type HuggingFaceService
- type HybridInference
- type InferenceService
- type MockInferenceService
- type ModelInfo
- type OllamaService
- func (o *OllamaService) Available() bool
- func (o *OllamaService) Chat(ctx context.Context, model string, messages []ChatMessage) (*GenerateResponse, error)
- func (o *OllamaService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
- func (o *OllamaService) ListModels(ctx context.Context) ([]ModelInfo, error)
- type Usage
- type VLLMService
- func (v *VLLMService) Available() bool
- func (v *VLLMService) BatchGenerate(ctx context.Context, reqs []GenerateRequest) ([]*GenerateResponse, error)
- func (v *VLLMService) ChatCompletion(ctx context.Context, model string, messages []ChatMessage, maxTokens int) (*GenerateResponse, error)
- func (v *VLLMService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
- func (v *VLLMService) ListModels(ctx context.Context) ([]ModelInfo, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ChatMessage ¶
ChatMessage represents a chat message
type GenerateRequest ¶
type GenerateRequest struct {
Model string
Prompt string
MaxTokens int
Temperature float64
Stop []string
}
GenerateRequest represents an inference request
type GenerateResponse ¶
GenerateResponse represents an inference response
type HuggingFaceService ¶
type HuggingFaceService struct {
// contains filtered or unexported fields
}
HuggingFaceService implements InferenceService for HuggingFace Inference API
func NewHuggingFaceService ¶
func NewHuggingFaceService(endpoint, token string) *HuggingFaceService
NewHuggingFaceService creates a new HuggingFace inference service Uses HF_TOKEN environment variable if token is empty
func (*HuggingFaceService) Available ¶
func (h *HuggingFaceService) Available() bool
Available checks if HuggingFace API is available
func (*HuggingFaceService) Generate ¶
func (h *HuggingFaceService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
Generate performs text generation using HuggingFace Inference API
func (*HuggingFaceService) TextGenerationInference ¶
func (h *HuggingFaceService) TextGenerationInference(ctx context.Context, endpoint string, req GenerateRequest) (*GenerateResponse, error)
TextGenerationInference sends request to a dedicated TGI endpoint
type HybridInference ¶
type HybridInference struct {
// contains filtered or unexported fields
}
HybridInference provides inference with fallback strategy
func NewHybridInference ¶
func NewHybridInference(local, cloud InferenceService) *HybridInference
NewHybridInference creates a new hybrid inference service
func (*HybridInference) Available ¶
func (h *HybridInference) Available() bool
Available returns true if any inference service is available
func (*HybridInference) Generate ¶
func (h *HybridInference) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
Generate attempts local inference first, falls back to cloud
func (*HybridInference) SetPreferLocal ¶
func (h *HybridInference) SetPreferLocal(prefer bool)
SetPreferLocal sets whether to prefer local inference
type InferenceService ¶
type InferenceService interface {
Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
Available() bool
}
InferenceService defines the interface for LLM inference
type MockInferenceService ¶
type MockInferenceService struct {
// contains filtered or unexported fields
}
MockInferenceService provides a mock inference service for testing and development This should be replaced with real implementations (Ollama, vLLM, HuggingFace API, etc.)
func NewMockInferenceService ¶
func NewMockInferenceService(model string) *MockInferenceService
NewMockInferenceService creates a new mock inference service
func (*MockInferenceService) Available ¶
func (m *MockInferenceService) Available() bool
Available returns whether the service is available
func (*MockInferenceService) Generate ¶
func (m *MockInferenceService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
Generate returns a mock response
func (*MockInferenceService) SetAvailable ¶
func (m *MockInferenceService) SetAvailable(available bool)
SetAvailable sets the availability status (for testing)
type OllamaService ¶
type OllamaService struct {
// contains filtered or unexported fields
}
OllamaService implements InferenceService for Ollama
func NewOllamaService ¶
func NewOllamaService(baseURL string) *OllamaService
NewOllamaService creates a new Ollama inference service
func (*OllamaService) Available ¶
func (o *OllamaService) Available() bool
Available checks if Ollama is available
func (*OllamaService) Chat ¶
func (o *OllamaService) Chat(ctx context.Context, model string, messages []ChatMessage) (*GenerateResponse, error)
Chat performs chat completion using Ollama
func (*OllamaService) Generate ¶
func (o *OllamaService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
Generate performs inference using Ollama
func (*OllamaService) ListModels ¶
func (o *OllamaService) ListModels(ctx context.Context) ([]ModelInfo, error)
ListModels returns available models
type VLLMService ¶
type VLLMService struct {
// contains filtered or unexported fields
}
VLLMService implements InferenceService for vLLM (OpenAI-compatible API)
func NewVLLMService ¶
func NewVLLMService(baseURL, apiKey string) *VLLMService
NewVLLMService creates a new vLLM inference service
func (*VLLMService) Available ¶
func (v *VLLMService) Available() bool
Available checks if vLLM is available
func (*VLLMService) BatchGenerate ¶
func (v *VLLMService) BatchGenerate(ctx context.Context, reqs []GenerateRequest) ([]*GenerateResponse, error)
BatchGenerate performs batch inference
func (*VLLMService) ChatCompletion ¶
func (v *VLLMService) ChatCompletion(ctx context.Context, model string, messages []ChatMessage, maxTokens int) (*GenerateResponse, error)
ChatCompletion performs chat completion using vLLM
func (*VLLMService) Generate ¶
func (v *VLLMService) Generate(ctx context.Context, req GenerateRequest) (*GenerateResponse, error)
Generate performs inference using vLLM completions endpoint
func (*VLLMService) ListModels ¶
func (v *VLLMService) ListModels(ctx context.Context) ([]ModelInfo, error)
ListModels returns available models from vLLM