Documentation
¶
Index ¶
- Constants
- Variables
- func EnsureONNXRuntimeSharedLib() (string, error)
- func HFHubDownload(repoID, filename string) (string, error)
- func HFHubEnsureFiles(repoID string, files []string) (map[string]string, error)
- func HFHubEnsureOptionalFiles(repoID string, files []string) (map[string]string, error)
- type ChatMessage
- type Config
- func (c *Config) BOS_TOKEN_ID() int64
- func (c *Config) ConvLCache() int
- func (c *Config) EOS_TOKEN_ID() int64
- func (c *Config) HiddenSize() int
- func (c *Config) LayerTypes() []string
- func (c *Config) ModelType() string
- func (c *Config) NumAttentionHeads() int
- func (c *Config) NumHiddenLayers() int
- func (c *Config) NumKeyValueHeads() int
- func (c *Config) PAD_TOKEN_ID() int64
- func (c *Config) Raw() map[string]any
- func (c *Config) StopStrings() []string
- func (c *Config) VocabSize() int
- type GenerationOptions
- type Generator
- type IOPreset
- type MessageRole
- type ModelForCausalLM
- type PipelineStreamEvent
- type Tokenizer
- func (t *Tokenizer) BatchDecode(batch [][]int64) ([]string, error)
- func (t *Tokenizer) Decode(ids []int64) (string, error)
- func (t *Tokenizer) Encode(text string, addSpecialTokens bool) ([]int64, error)
- func (t *Tokenizer) EncodeChat(messages []ChatMessage) (inputIDs [][]int64, attentionMask [][]int64, promptLen int, rawText string, ...)
- func (t *Tokenizer) Info() string
- type ToolDefinition
- type ToolParameter
Constants ¶
const Version = "1.0.5"
Version is bumped automatically by the release workflow on pushes to main.
Variables ¶
var AutoConfig autoConfig
var AutoModelForCausalLM autoModelForCausalLM
var AutoTokenizer autoTokenizer
Functions ¶
func EnsureONNXRuntimeSharedLib ¶
EnsureONNXRuntimeSharedLib downloads (if needed) and sets the path to the platform-appropriate ONNX Runtime shared library. It returns the absolute path configured via onnx.SetSharedLibraryPath.
func HFHubDownload ¶
HFHubDownload downloads a file from a Hugging Face repo into a local cache. Very simple v1: no auth, no revision. Cache dir can be overridden with CACHE_DIR env; default: ./models/huggingface.co/<repoID>/resolve/main/
func HFHubEnsureFiles ¶
HFHubEnsureFiles checks (via HEAD) and downloads a set of files into the cache. Returns a map of filename -> local path.
Types ¶
type ChatMessage ¶
type ChatMessage struct {
Role MessageRole `json:"role"`
Content string `json:"content"`
Name string `json:"name,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"`
}
type Config ¶
type Config struct {
// contains filtered or unexported fields
}
Config holds model configuration loaded from config.json.
func (*Config) BOS_TOKEN_ID ¶
func (*Config) ConvLCache ¶
func (*Config) EOS_TOKEN_ID ¶
func (*Config) HiddenSize ¶
func (*Config) LayerTypes ¶
func (*Config) NumAttentionHeads ¶
func (*Config) NumHiddenLayers ¶
func (*Config) NumKeyValueHeads ¶
func (*Config) PAD_TOKEN_ID ¶
func (*Config) StopStrings ¶
type GenerationOptions ¶
type GenerationOptions struct {
MaxNewTokens int
DoSample bool
Streamer func(ev PipelineStreamEvent) bool // return false to stop early
StopSequences []string
}
GenerationOptions describes generation parameters for a call.
type Generator ¶
Generator is what Pipeline(...) returns. It mirrors the JS/Python pattern: generator(messages, options) -> output.
func Pipeline ¶
Pipeline is the exported HF-style entry point:
generator, err := Pipeline("text-generation", modelID, map[string]any{"dtype": "q4"})
Internally it delegates to the lowercase pipelineImpl, so you can define a small-p alias in your own code if you dot-import the package:
var pipeline = transformers.Pipeline
type IOPreset ¶
type IOPreset int
IOPreset describes how we intend to wire inputs/outputs for a model.
type MessageRole ¶
type MessageRole string
const ( RoleSystem MessageRole = "system" RoleUser MessageRole = "user" RoleAssistant MessageRole = "assistant" RoleTool MessageRole = "tool" )
type ModelForCausalLM ¶
type ModelForCausalLM struct {
// contains filtered or unexported fields
}
ModelForCausalLM is our ONNX-backed language model wrapper.
func (*ModelForCausalLM) Generate ¶
func (m *ModelForCausalLM) Generate( tokenizer *Tokenizer, inputIDs [][]int64, attentionMask [][]int64, opts GenerationOptions, ) ([][]int64, error)
Generate runs a chat-style generation loop with optional streaming. It currently supports batch=1 only.
type PipelineStreamEvent ¶
type PipelineStreamEvent struct {
TokenID int64
DeltaText string
FullText string
Step int
Done bool
}
Streamer event exposed to user callbacks when using "streamer" option.
type Tokenizer ¶
type Tokenizer struct {
// contains filtered or unexported fields
}
Tokenizer wraps sugarme/tokenizer with a HF-like interface.
func (*Tokenizer) BatchDecode ¶
BatchDecode helper.
func (*Tokenizer) EncodeChat ¶
func (t *Tokenizer) EncodeChat( messages []ChatMessage, ) (inputIDs [][]int64, attentionMask [][]int64, promptLen int, rawText string, err error)
EncodeChat encodes the full chat into input IDs and attention mask.
type ToolDefinition ¶
type ToolDefinition struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters ToolParameter `json:"parameters"`
}
type ToolParameter ¶
type ToolParameter struct {
Type string `json:"type"`
Description string `json:"description,omitempty"`
Enum []string `json:"enum,omitempty"`
Properties map[string]ToolParameter `json:"properties,omitempty"`
Required []string `json:"required,omitempty"`
}
Tool schema types – kept for future use; v1 doesn't yet embed tools into prompt.