Documentation
¶
Index ¶
- Variables
- type Cache
- type Causal
- func (c *Causal) Close()
- func (c *Causal) CopyPrefix(srcSeq, dstSeq int, len int32)
- func (c *Causal) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor)
- func (c *Causal) Init(backend ml.Backend, dtype ml.DType, capacity int32)
- func (c *Causal) Put(ctx ml.Context, key, value ml.Tensor)
- func (c *Causal) Remove(seq int, beginIndex, endIndex int32) error
- func (c *Causal) SetLayer(layer int)
- func (c *Causal) StartForward(ctx ml.Context, positions []int32, seqs []int) error
- type EncoderCache
- func (c *EncoderCache) Close()
- func (c *EncoderCache) CopyPrefix(srcSeq, dstSeq int, len int32)
- func (c *EncoderCache) EncoderCached() bool
- func (c *EncoderCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor)
- func (c *EncoderCache) Init(backend ml.Backend, dtype ml.DType, capacity int32)
- func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor)
- func (c *EncoderCache) Remove(seq int, beginIndex, endIndex int32) error
- func (c *EncoderCache) SetLayer(layer int)
- func (c *EncoderCache) StartForward(ctx ml.Context, positions []int32, seqs []int) error
- type WrapperCache
- func (c *WrapperCache) Close()
- func (c *WrapperCache) CopyPrefix(srcSeq, dstSeq int, len int32)
- func (c *WrapperCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor)
- func (c *WrapperCache) Init(backend ml.Backend, dtype ml.DType, capacity int32)
- func (c *WrapperCache) Put(ctx ml.Context, key, value ml.Tensor)
- func (c *WrapperCache) Remove(seq int, beginIndex, endIndex int32) error
- func (c *WrapperCache) SetLayer(layer int)
- func (c *WrapperCache) SetLayerType(layerType int)
- func (c *WrapperCache) StartForward(ctx ml.Context, positions []int32, seqs []int) error
- func (c *WrapperCache) UnderlyingCache() Cache
Constants ¶
This section is empty.
Variables ¶
View Source
var ( ErrKvCacheFull = errors.New("could not find a kv cache slot") ErrNotSupported = errors.New("model does not support operation") )
Functions ¶
This section is empty.
Types ¶
type Cache ¶
type Cache interface {
// SetLayer sets the active layer of the cache
SetLayer(layer int)
// Get returns the history of key and value tensors plus a mask
//
// The shape of the tensors is documented in the specific
// cache implementation used.
Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor)
// Put stores a batch of key and value in the cache
//
// The shape of the tensors is documented in the specific
// cache implementation used.
Put(ctx ml.Context, key, value ml.Tensor)
// Init sets up runtime parameters
Init(backend ml.Backend, dtype ml.DType, capacity int32)
// Close closes the cache and frees resources associated with it
Close()
// StartForward is called before the start of the model's forward pass.
// For each token in the coming batch, there must be a corresponding
// entry in positions and seqs.
StartForward(ctx ml.Context, positions []int32, seqs []int) error
// CopyPrefix copies tokens in the range [0, len) from srcSeq to dstSeq
CopyPrefix(srcSeq, dstSeq int, len int32)
// Remove deletes tokens in the range [beginIndex, endIndex) from seq. Set
// endIndex to math.MaxInt32 to remove everything starting at beginIndex.
//
// If an error occurs, the entire context for the sequence should be
// removed by calling Remove(seq, 0, math.MaxInt32)
Remove(seq int, beginIndex, endIndex int32) error
}
type Causal ¶
Causal cache stores K and V tensors according to their position in the sequence. Returns the history and a mask for attending to past tokens
The tensors are of shape embed dim, kv heads, batch size The mask is of shape history size, batch size
func NewCausalCache ¶
func NewCausalCache(shift shiftFn) *Causal
func NewSWACache ¶
func (*Causal) CopyPrefix ¶
type EncoderCache ¶
type EncoderCache struct {
// contains filtered or unexported fields
}
Encoder cache stores K and V tensors that are position independent
The tensors can be of any shape and will be returned as they were stored The mask is currently always nil
Not currently safe for multiple sequences
func NewEncoderCache ¶
func NewEncoderCache() *EncoderCache
func (*EncoderCache) Close ¶
func (c *EncoderCache) Close()
func (*EncoderCache) CopyPrefix ¶
func (c *EncoderCache) CopyPrefix(srcSeq, dstSeq int, len int32)
func (*EncoderCache) EncoderCached ¶
func (c *EncoderCache) EncoderCached() bool
func (*EncoderCache) Remove ¶
func (c *EncoderCache) Remove(seq int, beginIndex, endIndex int32) error
func (*EncoderCache) SetLayer ¶
func (c *EncoderCache) SetLayer(layer int)
func (*EncoderCache) StartForward ¶
type WrapperCache ¶
type WrapperCache struct {
// contains filtered or unexported fields
}
Wrapper cache is a container for multiple types of caches, such as for the encoding and decoding portions of a model.
func NewWrapperCache ¶
func NewWrapperCache(caches ...Cache) *WrapperCache
func (*WrapperCache) Close ¶
func (c *WrapperCache) Close()
func (*WrapperCache) CopyPrefix ¶
func (c *WrapperCache) CopyPrefix(srcSeq, dstSeq int, len int32)
func (*WrapperCache) Remove ¶
func (c *WrapperCache) Remove(seq int, beginIndex, endIndex int32) error
func (*WrapperCache) SetLayer ¶
func (c *WrapperCache) SetLayer(layer int)
func (*WrapperCache) SetLayerType ¶
func (c *WrapperCache) SetLayerType(layerType int)
func (*WrapperCache) StartForward ¶
func (*WrapperCache) UnderlyingCache ¶
func (c *WrapperCache) UnderlyingCache() Cache
Click to show internal directories.
Click to hide internal directories.