Documentation
¶
Index ¶
- func AllInputTokens(pipeline *BasePipeline)
- func CreateImageTensors(batch *PipelineBatch, model *Model, preprocessed [][][][]float32, ...) error
- func CreateInputTensors(batch *PipelineBatch, model *Model, runtime string) error
- func CreateInputTensorsTraining(batch *PipelineBatch, model *Model, runtime string) error
- func CreateMessages(batch *PipelineBatch, p *BasePipeline, inputs any, systemPrompt string) error
- func CreateMessagesORT(_ *PipelineBatch, _ any, _ string) error
- func CreateModelBackend(model *Model, s *options.Options) error
- func CreateTabularTensors(batch *PipelineBatch, model *Model, features [][]float32, runtime string) error
- func Decode(tokens []uint32, tokenizer *Tokenizer, skipSpecialTokens bool) (string, error)
- func DetectImageTensorFormat(model *Model) (string, error)
- func GetNames(info []InputOutputInfo) []string
- func GetOnnxModelPath(model *Model) error
- func LoadTokenizer(model *Model, s *options.Options) error
- func PreprocessImages(format string, images []image.Image, preprocess []imageutil.PreprocessStep, ...) ([][][][]float32, error)
- func ReshapeOutput[T float32 | int64 | int32](input []T, meta InputOutputInfo, batchSize int, paddingMask [][]bool, ...) any
- func RunGenerativeSessionOnBatch(ctx context.Context, batch *PipelineBatch, p *BasePipeline, maxLength int, ...) (chan SequenceDelta, chan error, error)
- func RunSessionOnBatch(batch *PipelineBatch, p *BasePipeline) error
- func TokenizeInputs(batch *PipelineBatch, tk *Tokenizer, inputs []string)
- type BasePipeline
- type GoMLXModel
- type GoTokenizer
- type InputOutputInfo
- type Message
- type Model
- type ORTModel
- type OutputInfo
- type Pipeline
- type PipelineBatch
- type PipelineBatchOutput
- type PipelineConfig
- type PipelineMetadata
- type PipelineOption
- type PipelineStatistics
- type RustTokenizer
- type SequenceDelta
- type Shape
- type TokenizedInput
- type Tokenizer
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AllInputTokens ¶
func AllInputTokens(pipeline *BasePipeline)
func CreateImageTensors ¶
func CreateImageTensors(batch *PipelineBatch, model *Model, preprocessed [][][][]float32, runtime string) error
func CreateInputTensors ¶
func CreateInputTensors(batch *PipelineBatch, model *Model, runtime string) error
func CreateInputTensorsTraining ¶
func CreateInputTensorsTraining(batch *PipelineBatch, model *Model, runtime string) error
CreateInputTensorsTraining creates input tensors for training. Same as CreateInputTensors but we never pad the batch size as we expect regular batch sizes from the dataset.
func CreateMessages ¶ added in v0.6.0
func CreateMessages(batch *PipelineBatch, p *BasePipeline, inputs any, systemPrompt string) error
func CreateMessagesORT ¶ added in v0.6.0
func CreateMessagesORT(_ *PipelineBatch, _ any, _ string) error
func CreateTabularTensors ¶ added in v0.6.2
func CreateTabularTensors(batch *PipelineBatch, model *Model, features [][]float32, runtime string) error
CreateTabularTensors builds input tensors for classic ML/tabular models.
func DetectImageTensorFormat ¶ added in v0.6.0
DetectImageTensorFormat inspects the first image-like input and infers NHWC or NCHW.
func GetNames ¶
func GetNames(info []InputOutputInfo) []string
func GetOnnxModelPath ¶ added in v0.6.2
func PreprocessImages ¶ added in v0.6.0
func PreprocessImages(format string, images []image.Image, preprocess []imageutil.PreprocessStep, normalize []imageutil.NormalizationStep) ([][][][]float32, error)
PreprocessImages preprocesses images into a 4D tensor slice according to format and steps.
func ReshapeOutput ¶
func RunGenerativeSessionOnBatch ¶
func RunGenerativeSessionOnBatch(ctx context.Context, batch *PipelineBatch, p *BasePipeline, maxLength int, stopSequences []string, temperature *float64, topP *float64, seed *int) (chan SequenceDelta, chan error, error)
func RunSessionOnBatch ¶
func RunSessionOnBatch(batch *PipelineBatch, p *BasePipeline) error
func TokenizeInputs ¶
func TokenizeInputs(batch *PipelineBatch, tk *Tokenizer, inputs []string)
Types ¶
type BasePipeline ¶
type BasePipeline struct {
Model *Model
PipelineTimings *timings
PipelineName string
Runtime string
}
BasePipeline can be embedded by a pipeline.
func NewBasePipeline ¶
func NewBasePipeline[T Pipeline](config PipelineConfig[T], s *options.Options, model *Model) (*BasePipeline, error)
type GoMLXModel ¶
type GoMLXModel struct {
Backend backends.Backend
OnnxModel onnx.Model
Ctx *context.Context // ctx with the model's weights.
Exec *context.Exec // exec is used to execute the model with a context.
Call func(ctx *context.Context, inputs []*graph.Node) []*graph.Node
Destroy func()
BatchBuckets []int // BatchBuckets defines bucket sizes for batch dimension padding.
SequenceBuckets []int // SequenceBuckets defines bucket sizes for sequence length padding.
MaxCache int // MaxCache sets the maximum number of unique input shapes to cache.
}
type GoTokenizer ¶
type InputOutputInfo ¶
type Message ¶ added in v0.6.0
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
ImageURLs []string `json:"image_urls,omitempty"` // File paths or data URIs for multimodal support
}
Message represents a single message in a conversation. Images can be included via ImageURLs for multimodal models.
type Model ¶
type Model struct {
ID string
ORTModel *ORTModel
GoMLXModel *GoMLXModel
Tokenizer *Tokenizer
Destroy func() error
Pipelines map[string]Pipeline
IDLabelMap map[int]string
SeparatorToken string
Path string
OnnxFilename string
OnnxPath string
OnnxReader io.ReadCloser
InputsMeta []InputOutputInfo
OutputsMeta []InputOutputInfo
MaxPositionEmbeddings int
IsGenerative bool
}
type ORTModel ¶
type ORTModel struct {
Destroy func() error
GenerativeSession disabledGenerativeSession // placeholder when ORT disabled
}
type OutputInfo ¶
type Pipeline ¶
type Pipeline interface {
GetStatistics() PipelineStatistics // Get the pipeline running statistics
Validate() error // Validate the pipeline for correctness
GetMetadata() PipelineMetadata // Return metadata information for the pipeline
GetModel() *Model // Return the model used by the pipeline
IsGenerative() bool // Return whether the pipeline is generative
Run([]string) (PipelineBatchOutput, error) // Run the pipeline on an input
}
Pipeline is the interface that any pipeline must implement.
type PipelineBatch ¶
type PipelineBatch struct {
InputValues any
DestroyInputs func() error
Input []TokenizedInput
PaddingMask [][]bool
OutputValues []any
Size int
MaxSequenceLength int
MaxNewTokens int
// Multimodal support
Images any // Will hold *ortgenai.Images for generative models
DestroyMultimodal func() error
}
PipelineBatch represents a batch of inputs that runs through the pipeline.
func NewBatch ¶
func NewBatch(size int) *PipelineBatch
NewBatch initializes a new batch for inference.
func (*PipelineBatch) Destroy ¶
func (b *PipelineBatch) Destroy() error
type PipelineBatchOutput ¶
type PipelineBatchOutput interface {
GetOutput() []any
}
type PipelineConfig ¶
type PipelineConfig[T Pipeline] struct { ModelPath string Name string OnnxFilename string Options []PipelineOption[T] }
PipelineConfig is a configuration for a pipeline type that can be used to create that pipeline.
type PipelineMetadata ¶
type PipelineMetadata struct {
OutputsInfo []OutputInfo
}
type PipelineOption ¶
PipelineOption is an option for a pipeline type.
type PipelineStatistics ¶
type PipelineStatistics struct {
TokenizerTotalTime time.Duration
TokenizerExecutionCount uint64
TokenizerAvgQueryTime time.Duration
OnnxTotalTime time.Duration
OnnxExecutionCount uint64
OnnxAvgQueryTime time.Duration
TotalQueries uint64
TotalDocuments uint64
AverageLatency time.Duration
AverageBatchSize float64
FilteredResults uint64
AvgPrefillSeconds float64
TokensPerSecond float64
CumulativePrefillSum float64
CumulativePrefillCount int
CumulativeTokens int
CumulativeTokenDurationSeconds float64
}
func (*PipelineStatistics) ComputeOnnxStatistics ¶
func (p *PipelineStatistics) ComputeOnnxStatistics(timings *timings)
func (*PipelineStatistics) ComputeTokenizerStatistics ¶
func (p *PipelineStatistics) ComputeTokenizerStatistics(timings *timings)
func (*PipelineStatistics) Print ¶
func (p *PipelineStatistics) Print()
type RustTokenizer ¶
type RustTokenizer struct{}
type SequenceDelta ¶ added in v0.6.0
type Shape ¶
type Shape []int64
type TokenizedInput ¶
type TokenizedInput struct {
Raw string
Tokens []string
TokenIDs []uint32
TypeIDs []uint32
AttentionMask []uint32
SpecialTokensMask []uint32
Offsets [][2]uint
WordIDs []int
MaxAttentionIndex int
}
TokenizedInput holds the result of running tokenizer on an input.
type Tokenizer ¶
type Tokenizer struct {
RustTokenizer *RustTokenizer
GoTokenizer *GoTokenizer
TokenizerTimings *timings
Destroy func() error
Runtime string
MaxAllowedTokens int
}