backends

package

v0.6.5 Latest Latest Go to latest Published: Mar 17, 2026 License: Apache-2.0 Imports: 25 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/knights-analytics/hugot

Links

Open Source Insights

Documentation ¶

Index ¶

func AllInputTokens(pipeline *BasePipeline)
func CreateImageTensors(batch *PipelineBatch, model *Model, preprocessed [][][][]float32, ...) error
func CreateInputTensors(batch *PipelineBatch, model *Model, runtime string) error
func CreateInputTensorsTraining(batch *PipelineBatch, model *Model, runtime string) error
func CreateMessages(batch *PipelineBatch, p *BasePipeline, inputs any, systemPrompt string) error
func CreateMessagesORT(_ *PipelineBatch, _ any, _ string) error
func CreateModelBackend(model *Model, s *options.Options) error
func CreateTabularTensors(batch *PipelineBatch, model *Model, features [][]float32, runtime string) error
func Decode(tokens []uint32, tokenizer *Tokenizer, skipSpecialTokens bool) (string, error)
func DetectImageTensorFormat(model *Model) (string, error)
func GetNames(info []InputOutputInfo) []string
func GetOnnxModelPath(model *Model) error
func LoadTokenizer(model *Model, s *options.Options) error
func PreprocessImages(format string, images []image.Image, preprocess []imageutil.PreprocessStep, ...) ([][][][]float32, error)
func ReshapeOutput[T float32 | int64 | int32](input []T, meta InputOutputInfo, batchSize int, paddingMask [][]bool, ...) any
func RunGenerativeSessionOnBatch(ctx context.Context, batch *PipelineBatch, p *BasePipeline, maxLength int, ...) (chan SequenceDelta, chan error, error)
func RunSessionOnBatch(batch *PipelineBatch, p *BasePipeline) error
func TokenizeInputs(batch *PipelineBatch, tk *Tokenizer, inputs []string)
type BasePipeline
- func NewBasePipeline[T Pipeline](config PipelineConfig[T], s *options.Options, model *Model) (*BasePipeline, error)
type GoMLXModel
- func (goMLXModel *GoMLXModel) Save(w io.Writer) error
type GoTokenizer
type InputOutputInfo
type Message
type Model
- func LoadModel(path string, onnxFilename string, options *options.Options, isGenerative bool) (*Model, error)
type ORTModel
type OutputInfo
type Pipeline
type PipelineBatch
- func NewBatch(size int) *PipelineBatch
- func (b *PipelineBatch) Destroy() error
type PipelineBatchOutput
type PipelineConfig
type PipelineMetadata
type PipelineOption
type PipelineStatistics
- func (p *PipelineStatistics) ComputeOnnxStatistics(timings *timings)
- func (p *PipelineStatistics) ComputeTokenizerStatistics(timings *timings)
- func (p *PipelineStatistics) Print()
type RustTokenizer
type SequenceDelta
type Shape
- func NewShape(dimensions ...int64) Shape
- func (s Shape) String() string
- func (s Shape) ValuesInt() []int
type TokenizedInput
type Tokenizer

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func AllInputTokens ¶

func AllInputTokens(pipeline *BasePipeline)

func CreateImageTensors ¶

func CreateImageTensors(batch *PipelineBatch, model *Model, preprocessed [][][][]float32, runtime string) error

func CreateInputTensors ¶

func CreateInputTensors(batch *PipelineBatch, model *Model, runtime string) error

func CreateInputTensorsTraining ¶

func CreateInputTensorsTraining(batch *PipelineBatch, model *Model, runtime string) error

CreateInputTensorsTraining creates input tensors for training. Same as CreateInputTensors but we never pad the batch size as we expect regular batch sizes from the dataset.

func CreateMessages ¶ added in v0.6.0

func CreateMessages(batch *PipelineBatch, p *BasePipeline, inputs any, systemPrompt string) error

func CreateMessagesORT ¶ added in v0.6.0

func CreateMessagesORT(_ *PipelineBatch, _ any, _ string) error

func CreateModelBackend ¶

func CreateModelBackend(model *Model, s *options.Options) error

func CreateTabularTensors ¶ added in v0.6.2

func CreateTabularTensors(batch *PipelineBatch, model *Model, features [][]float32, runtime string) error

CreateTabularTensors builds input tensors for classic ML/tabular models.

func Decode ¶

func Decode(tokens []uint32, tokenizer *Tokenizer, skipSpecialTokens bool) (string, error)

func DetectImageTensorFormat ¶ added in v0.6.0

func DetectImageTensorFormat(model *Model) (string, error)

DetectImageTensorFormat inspects the first image-like input and infers NHWC or NCHW.

func GetNames ¶

func GetNames(info []InputOutputInfo) []string

func GetOnnxModelPath ¶ added in v0.6.2

func GetOnnxModelPath(model *Model) error

func LoadTokenizer ¶

func LoadTokenizer(model *Model, s *options.Options) error

func PreprocessImages ¶ added in v0.6.0

func PreprocessImages(format string, images []image.Image, preprocess []imageutil.PreprocessStep, normalize []imageutil.NormalizationStep) ([][][][]float32, error)

PreprocessImages preprocesses images into a 4D tensor slice according to format and steps.

func ReshapeOutput ¶

func ReshapeOutput[T float32 | int64 | int32](input []T, meta InputOutputInfo, batchSize int, paddingMask [][]bool, sequenceLength int) any

func RunGenerativeSessionOnBatch ¶

func RunGenerativeSessionOnBatch(ctx context.Context, batch *PipelineBatch, p *BasePipeline, maxLength int, stopSequences []string, temperature *float64, topP *float64, seed *int) (chan SequenceDelta, chan error, error)

func RunSessionOnBatch ¶

func RunSessionOnBatch(batch *PipelineBatch, p *BasePipeline) error

func TokenizeInputs ¶

func TokenizeInputs(batch *PipelineBatch, tk *Tokenizer, inputs []string)

Types ¶

type BasePipeline ¶

type BasePipeline struct {
	Model           *Model
	PipelineTimings *timings
	PipelineName    string
	Runtime         string
}

BasePipeline can be embedded by a pipeline.

func NewBasePipeline ¶

func NewBasePipeline[T Pipeline](config PipelineConfig[T], s *options.Options, model *Model) (*BasePipeline, error)

type GoMLXModel ¶

type GoMLXModel struct {
	Backend         backends.Backend
	OnnxModel       onnx.Model
	Ctx             *context.Context // ctx with the model's weights.
	Exec            *context.Exec    // exec is used to execute the model with a context.
	Call            func(ctx *context.Context, inputs []*graph.Node) []*graph.Node
	Destroy         func()
	BatchBuckets    []int // BatchBuckets defines bucket sizes for batch dimension padding.
	SequenceBuckets []int // SequenceBuckets defines bucket sizes for sequence length padding.
	MaxCache        int   // MaxCache sets the maximum number of unique input shapes to cache.
}

func (*GoMLXModel) Save ¶

func (goMLXModel *GoMLXModel) Save(w io.Writer) error

type GoTokenizer ¶

type GoTokenizer struct {
	Tokenizer *tokenizer.Tokenizer
}

type InputOutputInfo ¶

type InputOutputInfo struct {
	// The name of the input or output
	Name string
	// The input or output's dimensions, if it's a tensor. This should be
	// ignored for non-tensor types.
	Dimensions Shape
}

type Message ¶ added in v0.6.0

type Message struct {
	Role      string   `json:"role"`
	Content   string   `json:"content"`
	ImageURLs []string `json:"image_urls,omitempty"` // File paths or data URIs for multimodal support
}

Message represents a single message in a conversation. Images can be included via ImageURLs for multimodal models.

type Model ¶

type Model struct {
	ID                    string
	ORTModel              *ORTModel
	GoMLXModel            *GoMLXModel
	Tokenizer             *Tokenizer
	Destroy               func() error
	Pipelines             map[string]Pipeline
	IDLabelMap            map[int]string
	SeparatorToken        string
	Path                  string
	OnnxFilename          string
	OnnxPath              string
	OnnxReader            io.ReadCloser
	InputsMeta            []InputOutputInfo
	OutputsMeta           []InputOutputInfo
	MaxPositionEmbeddings int
	IsGenerative          bool
}

func LoadModel ¶

func LoadModel(path string, onnxFilename string, options *options.Options, isGenerative bool) (*Model, error)

type ORTModel ¶

type ORTModel struct {
	Destroy           func() error
	GenerativeSession disabledGenerativeSession // placeholder when ORT disabled
}

type OutputInfo ¶

type OutputInfo struct {
	Name       string
	Dimensions []int64
}

type Pipeline ¶

type Pipeline interface {
	GetStatistics() PipelineStatistics         // Get the pipeline running statistics
	Validate() error                           // Validate the pipeline for correctness
	GetMetadata() PipelineMetadata             // Return metadata information for the pipeline
	GetModel() *Model                          // Return the model used by the pipeline
	IsGenerative() bool                        // Return whether the pipeline is generative
	Run([]string) (PipelineBatchOutput, error) // Run the pipeline on an input
}

Pipeline is the interface that any pipeline must implement.

type PipelineBatch ¶

type PipelineBatch struct {
	InputValues       any
	DestroyInputs     func() error
	Input             []TokenizedInput
	PaddingMask       [][]bool
	OutputValues      []any
	Size              int
	MaxSequenceLength int
	MaxNewTokens      int
	// Multimodal support
	Images            any // Will hold *ortgenai.Images for generative models
	DestroyMultimodal func() error
}

PipelineBatch represents a batch of inputs that runs through the pipeline.

func NewBatch ¶

func NewBatch(size int) *PipelineBatch

NewBatch initializes a new batch for inference.

func (*PipelineBatch) Destroy ¶

func (b *PipelineBatch) Destroy() error

type PipelineBatchOutput ¶

type PipelineBatchOutput interface {
	GetOutput() []any
}

type PipelineConfig ¶

type PipelineConfig[T Pipeline] struct {
	ModelPath    string
	Name         string
	OnnxFilename string
	Options      []PipelineOption[T]
}

PipelineConfig is a configuration for a pipeline type that can be used to create that pipeline.

type PipelineMetadata ¶

type PipelineMetadata struct {
	OutputsInfo []OutputInfo
}

type PipelineOption ¶

type PipelineOption[T Pipeline] func(eo T) error

PipelineOption is an option for a pipeline type.

type PipelineStatistics ¶

type PipelineStatistics struct {
	TokenizerTotalTime             time.Duration
	TokenizerExecutionCount        uint64
	TokenizerAvgQueryTime          time.Duration
	OnnxTotalTime                  time.Duration
	OnnxExecutionCount             uint64
	OnnxAvgQueryTime               time.Duration
	TotalQueries                   uint64
	TotalDocuments                 uint64
	AverageLatency                 time.Duration
	AverageBatchSize               float64
	FilteredResults                uint64
	AvgPrefillSeconds              float64
	TokensPerSecond                float64
	CumulativePrefillSum           float64
	CumulativePrefillCount         int
	CumulativeTokens               int
	CumulativeTokenDurationSeconds float64
}

func (*PipelineStatistics) ComputeOnnxStatistics ¶

func (p *PipelineStatistics) ComputeOnnxStatistics(timings *timings)

func (*PipelineStatistics) ComputeTokenizerStatistics ¶

func (p *PipelineStatistics) ComputeTokenizerStatistics(timings *timings)

func (*PipelineStatistics) Print ¶

func (p *PipelineStatistics) Print()

type RustTokenizer ¶

type RustTokenizer struct{}

type SequenceDelta ¶ added in v0.6.0

type SequenceDelta struct {
	Token string
	Index int
}

type Shape ¶

type Shape []int64

func NewShape ¶

func NewShape(dimensions ...int64) Shape

NewShape Returns a Shape, with the given dimensions.

func (Shape) String ¶

func (s Shape) String() string

func (Shape) ValuesInt ¶

func (s Shape) ValuesInt() []int

type TokenizedInput ¶

type TokenizedInput struct {
	Raw               string
	Tokens            []string
	TokenIDs          []uint32
	TypeIDs           []uint32
	AttentionMask     []uint32
	SpecialTokensMask []uint32
	Offsets           [][2]uint
	WordIDs           []int
	MaxAttentionIndex int
}

TokenizedInput holds the result of running tokenizer on an input.

type Tokenizer ¶

type Tokenizer struct {
	RustTokenizer    *RustTokenizer
	GoTokenizer      *GoTokenizer
	TokenizerTimings *timings
	Destroy          func() error
	Runtime          string
	MaxAllowedTokens int
}

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL