backends

package
v0.6.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 17, 2026 License: Apache-2.0 Imports: 25 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AllInputTokens

func AllInputTokens(pipeline *BasePipeline)

func CreateImageTensors

func CreateImageTensors(batch *PipelineBatch, model *Model, preprocessed [][][][]float32, runtime string) error

func CreateInputTensors

func CreateInputTensors(batch *PipelineBatch, model *Model, runtime string) error

func CreateInputTensorsTraining

func CreateInputTensorsTraining(batch *PipelineBatch, model *Model, runtime string) error

CreateInputTensorsTraining creates input tensors for training. Same as CreateInputTensors but we never pad the batch size as we expect regular batch sizes from the dataset.

func CreateMessages added in v0.6.0

func CreateMessages(batch *PipelineBatch, p *BasePipeline, inputs any, systemPrompt string) error

func CreateMessagesORT added in v0.6.0

func CreateMessagesORT(_ *PipelineBatch, _ any, _ string) error

func CreateModelBackend

func CreateModelBackend(model *Model, s *options.Options) error

func CreateTabularTensors added in v0.6.2

func CreateTabularTensors(batch *PipelineBatch, model *Model, features [][]float32, runtime string) error

CreateTabularTensors builds input tensors for classic ML/tabular models.

func Decode

func Decode(tokens []uint32, tokenizer *Tokenizer, skipSpecialTokens bool) (string, error)

func DetectImageTensorFormat added in v0.6.0

func DetectImageTensorFormat(model *Model) (string, error)

DetectImageTensorFormat inspects the first image-like input and infers NHWC or NCHW.

func GetNames

func GetNames(info []InputOutputInfo) []string

func GetOnnxModelPath added in v0.6.2

func GetOnnxModelPath(model *Model) error

func LoadTokenizer

func LoadTokenizer(model *Model, s *options.Options) error

func PreprocessImages added in v0.6.0

func PreprocessImages(format string, images []image.Image, preprocess []imageutil.PreprocessStep, normalize []imageutil.NormalizationStep) ([][][][]float32, error)

PreprocessImages preprocesses images into a 4D tensor slice according to format and steps.

func ReshapeOutput

func ReshapeOutput[T float32 | int64 | int32](input []T, meta InputOutputInfo, batchSize int, paddingMask [][]bool, sequenceLength int) any

func RunGenerativeSessionOnBatch

func RunGenerativeSessionOnBatch(ctx context.Context, batch *PipelineBatch, p *BasePipeline, maxLength int, stopSequences []string, temperature *float64, topP *float64, seed *int) (chan SequenceDelta, chan error, error)

func RunSessionOnBatch

func RunSessionOnBatch(batch *PipelineBatch, p *BasePipeline) error

func TokenizeInputs

func TokenizeInputs(batch *PipelineBatch, tk *Tokenizer, inputs []string)

Types

type BasePipeline

type BasePipeline struct {
	Model           *Model
	PipelineTimings *timings
	PipelineName    string
	Runtime         string
}

BasePipeline can be embedded by a pipeline.

func NewBasePipeline

func NewBasePipeline[T Pipeline](config PipelineConfig[T], s *options.Options, model *Model) (*BasePipeline, error)

type GoMLXModel

type GoMLXModel struct {
	Backend         backends.Backend
	OnnxModel       onnx.Model
	Ctx             *context.Context // ctx with the model's weights.
	Exec            *context.Exec    // exec is used to execute the model with a context.
	Call            func(ctx *context.Context, inputs []*graph.Node) []*graph.Node
	Destroy         func()
	BatchBuckets    []int // BatchBuckets defines bucket sizes for batch dimension padding.
	SequenceBuckets []int // SequenceBuckets defines bucket sizes for sequence length padding.
	MaxCache        int   // MaxCache sets the maximum number of unique input shapes to cache.
}

func (*GoMLXModel) Save

func (goMLXModel *GoMLXModel) Save(w io.Writer) error

type GoTokenizer

type GoTokenizer struct {
	Tokenizer *tokenizer.Tokenizer
}

type InputOutputInfo

type InputOutputInfo struct {
	// The name of the input or output
	Name string
	// The input or output's dimensions, if it's a tensor. This should be
	// ignored for non-tensor types.
	Dimensions Shape
}

type Message added in v0.6.0

type Message struct {
	Role      string   `json:"role"`
	Content   string   `json:"content"`
	ImageURLs []string `json:"image_urls,omitempty"` // File paths or data URIs for multimodal support
}

Message represents a single message in a conversation. Images can be included via ImageURLs for multimodal models.

type Model

type Model struct {
	ID                    string
	ORTModel              *ORTModel
	GoMLXModel            *GoMLXModel
	Tokenizer             *Tokenizer
	Destroy               func() error
	Pipelines             map[string]Pipeline
	IDLabelMap            map[int]string
	SeparatorToken        string
	Path                  string
	OnnxFilename          string
	OnnxPath              string
	OnnxReader            io.ReadCloser
	InputsMeta            []InputOutputInfo
	OutputsMeta           []InputOutputInfo
	MaxPositionEmbeddings int
	IsGenerative          bool
}

func LoadModel

func LoadModel(path string, onnxFilename string, options *options.Options, isGenerative bool) (*Model, error)

type ORTModel

type ORTModel struct {
	Destroy           func() error
	GenerativeSession disabledGenerativeSession // placeholder when ORT disabled
}

type OutputInfo

type OutputInfo struct {
	Name       string
	Dimensions []int64
}

type Pipeline

type Pipeline interface {
	GetStatistics() PipelineStatistics         // Get the pipeline running statistics
	Validate() error                           // Validate the pipeline for correctness
	GetMetadata() PipelineMetadata             // Return metadata information for the pipeline
	GetModel() *Model                          // Return the model used by the pipeline
	IsGenerative() bool                        // Return whether the pipeline is generative
	Run([]string) (PipelineBatchOutput, error) // Run the pipeline on an input
}

Pipeline is the interface that any pipeline must implement.

type PipelineBatch

type PipelineBatch struct {
	InputValues       any
	DestroyInputs     func() error
	Input             []TokenizedInput
	PaddingMask       [][]bool
	OutputValues      []any
	Size              int
	MaxSequenceLength int
	MaxNewTokens      int
	// Multimodal support
	Images            any // Will hold *ortgenai.Images for generative models
	DestroyMultimodal func() error
}

PipelineBatch represents a batch of inputs that runs through the pipeline.

func NewBatch

func NewBatch(size int) *PipelineBatch

NewBatch initializes a new batch for inference.

func (*PipelineBatch) Destroy

func (b *PipelineBatch) Destroy() error

type PipelineBatchOutput

type PipelineBatchOutput interface {
	GetOutput() []any
}

type PipelineConfig

type PipelineConfig[T Pipeline] struct {
	ModelPath    string
	Name         string
	OnnxFilename string
	Options      []PipelineOption[T]
}

PipelineConfig is a configuration for a pipeline type that can be used to create that pipeline.

type PipelineMetadata

type PipelineMetadata struct {
	OutputsInfo []OutputInfo
}

type PipelineOption

type PipelineOption[T Pipeline] func(eo T) error

PipelineOption is an option for a pipeline type.

type PipelineStatistics

type PipelineStatistics struct {
	TokenizerTotalTime             time.Duration
	TokenizerExecutionCount        uint64
	TokenizerAvgQueryTime          time.Duration
	OnnxTotalTime                  time.Duration
	OnnxExecutionCount             uint64
	OnnxAvgQueryTime               time.Duration
	TotalQueries                   uint64
	TotalDocuments                 uint64
	AverageLatency                 time.Duration
	AverageBatchSize               float64
	FilteredResults                uint64
	AvgPrefillSeconds              float64
	TokensPerSecond                float64
	CumulativePrefillSum           float64
	CumulativePrefillCount         int
	CumulativeTokens               int
	CumulativeTokenDurationSeconds float64
}

func (*PipelineStatistics) ComputeOnnxStatistics

func (p *PipelineStatistics) ComputeOnnxStatistics(timings *timings)

func (*PipelineStatistics) ComputeTokenizerStatistics

func (p *PipelineStatistics) ComputeTokenizerStatistics(timings *timings)

func (*PipelineStatistics) Print

func (p *PipelineStatistics) Print()

type RustTokenizer

type RustTokenizer struct{}

type SequenceDelta added in v0.6.0

type SequenceDelta struct {
	Token string
	Index int
}

type Shape

type Shape []int64

func NewShape

func NewShape(dimensions ...int64) Shape

NewShape Returns a Shape, with the given dimensions.

func (Shape) String

func (s Shape) String() string

func (Shape) ValuesInt

func (s Shape) ValuesInt() []int

type TokenizedInput

type TokenizedInput struct {
	Raw               string
	Tokens            []string
	TokenIDs          []uint32
	TypeIDs           []uint32
	AttentionMask     []uint32
	SpecialTokensMask []uint32
	Offsets           [][2]uint
	WordIDs           []int
	MaxAttentionIndex int
}

TokenizedInput holds the result of running tokenizer on an input.

type Tokenizer

type Tokenizer struct {
	RustTokenizer    *RustTokenizer
	GoTokenizer      *GoTokenizer
	TokenizerTimings *timings
	Destroy          func() error
	Runtime          string
	MaxAllowedTokens int
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL