Documentation
¶
Index ¶
- type Attention
- type Cache
- type KVCache
- func (c *KVCache) Free()
- func (c *KVCache) Merge(parent, child Snapshot) Snapshot
- func (c *KVCache) Offset() int
- func (c *KVCache) Restore(snapshot Snapshot, target int) bool
- func (c *KVCache) Snapshot(fromOffset int) Snapshot
- func (c *KVCache) Split(snapshot Snapshot, at int) (Snapshot, Snapshot)
- func (c *KVCache) State() []*mlx.Array
- func (c *KVCache) Update(_ *batch.Batch, keys, values *mlx.Array) *nn.KVHistory
- type RecurrentCache
- func (c *RecurrentCache) Free()
- func (c *RecurrentCache) Get(b *batch.Batch, dtype mlx.DType) *nn.RecurrentHistory
- func (c *RecurrentCache) Merge(parent, child Snapshot) Snapshot
- func (c *RecurrentCache) Offset() int
- func (c *RecurrentCache) Put(b *batch.Batch, newConv, newDelta *mlx.Array)
- func (c *RecurrentCache) Restore(snapshot Snapshot, target int) bool
- func (c *RecurrentCache) Snapshot(fromOffset int) Snapshot
- func (c *RecurrentCache) Split(snapshot Snapshot, at int) (Snapshot, Snapshot)
- func (c *RecurrentCache) State() []*mlx.Array
- type RotatingKVCache
- func (c *RotatingKVCache) Free()
- func (c *RotatingKVCache) Merge(parent, child Snapshot) Snapshot
- func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool
- func (c *RotatingKVCache) Snapshot(fromOffset int) Snapshot
- func (c *RotatingKVCache) Split(snapshot Snapshot, at int) (Snapshot, Snapshot)
- func (c *RotatingKVCache) State() []*mlx.Array
- func (c *RotatingKVCache) Update(b *batch.Batch, keys, values *mlx.Array) *nn.KVHistory
- type Snapshot
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Attention ¶ added in v0.22.1
type Attention interface {
Cache
// Update appends (k, v) and returns an opaque nn.KVHistory for
// this layer's SDPA.
Update(b *batch.Batch, keys, values *mlx.Array) *nn.KVHistory
}
Attention is the contract for caches that back attention layers (KVCache, RotatingKVCache).
type Cache ¶
type Cache interface {
// State returns the cache-owned state roots that should be kept/evaluated.
State() []*mlx.Array
Free()
Offset() int
// Snapshot copies cache state from fromOffset to current offset into
// pinned VRAM arrays. The active cache is unchanged.
Snapshot(fromOffset int) Snapshot
// Restore brings the cache to target. If snapshot is nil, rewinds
// using the cache's own live state. Returns false if the target is
// unreachable (e.g. target > current offset, or negative).
Restore(snapshot Snapshot, target int) bool
// Merge combines two sequential snapshots [a,b) and [b,c) into [a,c).
// Takes ownership of both inputs.
Merge(parent, child Snapshot) Snapshot
// Split divides a snapshot [a,c) at offset b into [a,b) and [b,c).
// Takes ownership of the input. Cache types that cannot split
// (e.g. recurrent) return (nil, snapshot).
Split(snapshot Snapshot, at int) (parent, child Snapshot)
}
Cache is common state management shared by every cache kind. Writers live on the specific caches
type KVCache ¶
type KVCache struct {
// contains filtered or unexported fields
}
func NewKVCache ¶
func NewKVCache() *KVCache
type RecurrentCache ¶
type RecurrentCache struct {
// contains filtered or unexported fields
}
RecurrentCache stores state for linear-recurrent layers.
Conv state shape: [B, convTail, convDim] Delta state shape: [B, numVHeads, headVDim, headKDim]
func NewRecurrentCache ¶
func NewRecurrentCache(convTail, convDim, numVHeads, headVDim, headKDim int32) *RecurrentCache
func (*RecurrentCache) Free ¶
func (c *RecurrentCache) Free()
func (*RecurrentCache) Get ¶ added in v0.22.1
func (c *RecurrentCache) Get(b *batch.Batch, dtype mlx.DType) *nn.RecurrentHistory
Get returns the current conv/delta state for the SSM layer's read phase. Lazy-initializes zero-filled state tensors using b.InputIDs for the batch size; reallocates if the existing state's batch size or dtype no longer matches.
func (*RecurrentCache) Merge ¶ added in v0.18.3
func (c *RecurrentCache) Merge(parent, child Snapshot) Snapshot
func (*RecurrentCache) Offset ¶
func (c *RecurrentCache) Offset() int
func (*RecurrentCache) Put ¶ added in v0.22.1
func (c *RecurrentCache) Put(b *batch.Batch, newConv, newDelta *mlx.Array)
Put stores the post-computation conv/delta states for the SSM layer's write phase and advances the cache offset by the current forward's real token count.
Assumes B = 1; heterogeneous batches are not supported.
func (*RecurrentCache) Restore ¶ added in v0.18.3
func (c *RecurrentCache) Restore(snapshot Snapshot, target int) bool
func (*RecurrentCache) Snapshot ¶ added in v0.18.3
func (c *RecurrentCache) Snapshot(fromOffset int) Snapshot
func (*RecurrentCache) Split ¶ added in v0.18.3
func (c *RecurrentCache) Split(snapshot Snapshot, at int) (Snapshot, Snapshot)
func (*RecurrentCache) State ¶
func (c *RecurrentCache) State() []*mlx.Array
type RotatingKVCache ¶
type RotatingKVCache struct {
*KVCache
// contains filtered or unexported fields
}
RotatingKVCache implements sliding window attention with bounded memory
func NewRotatingKVCache ¶
func NewRotatingKVCache(maxSize int) *RotatingKVCache
func (*RotatingKVCache) Free ¶ added in v0.18.3
func (c *RotatingKVCache) Free()
func (*RotatingKVCache) Merge ¶ added in v0.18.3
func (c *RotatingKVCache) Merge(parent, child Snapshot) Snapshot
func (*RotatingKVCache) Restore ¶ added in v0.18.3
func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool
func (*RotatingKVCache) Snapshot ¶ added in v0.18.3
func (c *RotatingKVCache) Snapshot(fromOffset int) Snapshot
func (*RotatingKVCache) Split ¶ added in v0.18.3
func (c *RotatingKVCache) Split(snapshot Snapshot, at int) (Snapshot, Snapshot)
func (*RotatingKVCache) State ¶
func (c *RotatingKVCache) State() []*mlx.Array