caching

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 7, 2026 License: MIT Imports: 33 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrBatchClosed = fmt.Errorf("batch has been written or closed")
View Source
var ErrKeyEmpty = fmt.Errorf("key cannot be empty")
View Source
var ErrMemtableFull = fmt.Errorf("memtable full")
View Source
var ErrUnsafeOptions = fmt.Errorf("unsafe options require AllowUnsafe")
View Source
var ErrValueNil = fmt.Errorf("value cannot be nil")

Functions

func SetIteratorDebug

func SetIteratorDebug(enabled bool)

SetIteratorDebug toggles attaching debug metadata to iterators returned by CachingDB.Iterator. It is intended for benchmarking/diagnostics.

Types

type BackendDB

type BackendDB interface {
	Get(key []byte) ([]byte, error)
	GetUnsafe(key []byte) ([]byte, error)
	GetAppend(key, dst []byte) ([]byte, error)
	Has(key []byte) (bool, error)
	Iterator(start, end []byte) (iterator.UnsafeIterator, error)
	ReverseIterator(start, end []byte) (iterator.UnsafeIterator, error)
	NewBatch() batch.Interface
	Close() error
	Print() error
	Stats() map[string]string
}

BackendDB defines the subset of treedb.DB needed by CachingDB.

type Batch

type Batch struct {
	// contains filtered or unexported fields
}

func (*Batch) Close

func (b *Batch) Close() error

func (*Batch) Delete

func (b *Batch) Delete(key []byte) error

func (*Batch) DeleteView

func (b *Batch) DeleteView(key []byte) error

DeleteView records a Delete without copying key bytes. Callers must treat key as immutable until the batch is written or closed.

func (*Batch) GetByteSize

func (b *Batch) GetByteSize() (int, error)

func (*Batch) Replay

func (b *Batch) Replay(fn func(batch.Entry) error) error

func (*Batch) Reset

func (b *Batch) Reset()

Reset clears the batch for reuse without closing it.

This intentionally keeps internal buffers to avoid per-batch allocations in callers that frequently reset (e.g. geth benchmarks).

func (*Batch) Set

func (b *Batch) Set(key, value []byte) error

func (*Batch) SetOps

func (b *Batch) SetOps(ops []batch.Entry) error

func (*Batch) SetView

func (b *Batch) SetView(key, value []byte) error

SetView records a Put without copying key/value bytes. Callers must treat key/value as immutable until the batch is written or closed.

func (*Batch) Write

func (b *Batch) Write() error

func (*Batch) WriteSync

func (b *Batch) WriteSync() error

type DB

type DB struct {
	// contains filtered or unexported fields
}

func Open

func Open(dir string, backend BackendDB, opts Options) (*DB, error)

func (*DB) Checkpoint

func (db *DB) Checkpoint() error

Checkpoint forces a durable backend boundary and trims the WAL so long-running cached-mode runs do not accumulate unbounded `wal/` growth.

It blocks writers while it:

  • rotates the current mutable memtable (if non-empty),
  • rotates to a fresh WAL segment,
  • flushes all queued memtables with backend sync,
  • forces a backend sync boundary (even if the queue is empty),
  • removes all older WAL segments (keeping only the currently-open one).

func (*DB) Close

func (db *DB) Close() error

func (*DB) CompactionAssist

func (db *DB) CompactionAssist()

CompactionAssist performs bounded flush work when backpressure triggers. It is intended to be called by background maintenance (e.g. index compaction) so that flush debt does not grow unbounded in the absence of foreground writes.

func (*DB) Delete

func (db *DB) Delete(key []byte) error

func (*DB) DeleteRange

func (db *DB) DeleteRange(start, end []byte) error

DeleteRange deletes all keys in the range [start, end).

When WAL is disabled and the backend is empty, a full-range delete can be satisfied by clearing the in-memory layers without enumerating keys.

func (*DB) DeleteSync

func (db *DB) DeleteSync(key []byte) error

func (*DB) Drain

func (db *DB) Drain() error

Drain flushes all currently buffered writes (mutable + queued memtables) to the backend. It is intended for maintenance operations that require a fully materialized backend state (e.g. index vacuum).

Drain does not provide mutual exclusion against concurrent writers; callers should ensure no writes occur concurrently if they require a fully drained state.

func (*DB) Get

func (db *DB) Get(key []byte) ([]byte, error)

Get returns a safe copy of the value.

func (*DB) GetAppend

func (db *DB) GetAppend(key, dst []byte) ([]byte, error)

GetAppend appends the value for the key to dst and returns the new slice. If the key is not found, it returns dst and ErrKeyNotFound.

func (*DB) GetUnsafe

func (db *DB) GetUnsafe(key []byte) ([]byte, error)

GetUnsafe returns a safe copy of the value.

func (*DB) Has

func (db *DB) Has(key []byte) (bool, error)

func (*DB) Iterator

func (db *DB) Iterator(start, end []byte) (merging.Iterator, error)

Iterator implements DB.Iterator

func (*DB) NewBatch

func (db *DB) NewBatch() *Batch

func (*DB) NewBatchWithSize

func (db *DB) NewBatchWithSize(size int) *Batch

func (*DB) Print

func (db *DB) Print() error

func (*DB) QueueBacklogBytes

func (db *DB) QueueBacklogBytes() int64

QueueBacklogBytes returns the current queued memtable backlog in bytes.

func (*DB) ReverseIterator

func (db *DB) ReverseIterator(start, end []byte) (merging.Iterator, error)

func (*DB) Set

func (db *DB) Set(key, value []byte) error

func (*DB) SetDictStore added in v0.2.0

func (db *DB) SetDictStore(store DictStore)

SetDictStore installs the dictionary store for current-ID freezing.

func (*DB) SetSync

func (db *DB) SetSync(key, value []byte) error

func (*DB) SetTemplateStore added in v0.3.0

func (db *DB) SetTemplateStore(store template.Store)

SetTemplateStore installs the template store used for template compression.

func (*DB) StartAutoCheckpoint

func (db *DB) StartAutoCheckpoint(interval time.Duration, maxWALBytes int64, idleInterval time.Duration)

StartAutoCheckpoint enables a background loop that periodically forces a durable boundary and trims cached-mode WAL segments. When idleInterval > 0, it also triggers an opportunistic checkpoint after a period of write-idleness.

interval > 0 enables periodic checkpoints. maxWALBytes is a safety cap: if > 0, the loop will attempt to checkpoint when the effective WAL bytes exceed this cap. maxWALBytes <= 0 disables the size trigger.

This does not make each individual write durable; it bounds the window of unsynced writes for long-running workloads.

func (*DB) Stats

func (db *DB) Stats() map[string]string

func (*DB) TriggerAutoCheckpoint

func (db *DB) TriggerAutoCheckpoint()

TriggerAutoCheckpoint schedules a best-effort immediate auto-checkpoint pass.

func (*DB) TriggerFlush

func (db *DB) TriggerFlush()

TriggerFlush schedules a background flush pass (best-effort).

type DictStore added in v0.2.0

type DictStore interface {
	GetCurrent(ctx context.Context) (uint64, error)
	GetDictBytes(ctx context.Context, dictID uint64) ([]byte, error)
}

DictStore provides access to the current dictionary ID for write freezing.

type Options

type Options struct {
	FlushThreshold int64

	// MemtableMode selects the in-memory write buffer implementation.
	// Supported: "skiplist", "hash_sorted", "btree", "adaptive".
	// Use "adaptive" or "adaptive:<mode>" to switch per-rotation based on workload.
	MemtableMode string

	// MemtableShards controls the number of mutable memtable shards. Values <= 0
	// use a default derived from GOMAXPROCS. The count is rounded down to a power
	// of two.
	MemtableShards int

	// Legacy backpressure knob: queue length limit.
	// 0 uses the default (4). <0 disables writer backpressure entirely.
	MaxQueuedMemtables int

	// Adaptive backpressure knobs (seconds/bytes). If any of these are non-zero,
	// the caching layer uses backlog-bytes thresholds instead of queue length.
	SlowdownBacklogSeconds float64
	StopBacklogSeconds     float64
	MaxBacklogBytes        int64

	// Writer flush assist limits when backpressure triggers.
	WriterFlushMaxMemtables int
	WriterFlushMaxDuration  time.Duration

	// FlushBuildConcurrency controls how many goroutines may be used to build a
	// combined flush batch from multiple immutable memtables. Values <= 1 disable
	// parallelism.
	FlushBuildConcurrency int
	// FlushBuildMinEntries gates the parallel build path by total entries.
	// Values <= 0 use a default of 16k.
	FlushBuildMinEntries int
	// FlushBuildMinUnits gates the parallel build path by number of queued units.
	// Values <= 0 use a default of 2.
	FlushBuildMinUnits int
	// FlushBuildChunkCap controls the maximum entries per build chunk.
	// Values < 0 use the fixed default of 8192, 0 enables adaptive chunk sizing,
	// and values > 0 set a fixed cap.
	FlushBuildChunkCap int
	// FlushBuildChunkTargetBytes controls adaptive chunk sizing (bytes per chunk).
	// Values <= 0 use a default of 2MiB.
	FlushBuildChunkTargetBytes int
	// FlushBuildChunkMinBytes clamps adaptive chunk sizes (minimum bytes).
	// Values <= 0 use a default of 1MiB.
	FlushBuildChunkMinBytes int
	// FlushBuildChunkMaxBytes clamps adaptive chunk sizes (maximum bytes).
	// Values <= 0 use a default of 4MiB.
	FlushBuildChunkMaxBytes int
	// FlushBuildPrefetchUnits controls how many memtables to start building ahead
	// of the consumer. Values <= 0 use FlushBuildConcurrency.
	FlushBuildPrefetchUnits int

	// FlushBackendMaxEntries caps how many operations are buffered into a single
	// backend batch before committing it and continuing with a fresh batch.
	//
	// This increases backend commit cadence during very large flushes, which can
	// reduce index.db high-watermark growth under small KeepRecent windows by
	// making retired pages eligible for reuse sooner.
	//
	// 0 uses the internal default. Negative disables chunking (single backend
	// commit per flush).
	FlushBackendMaxEntries int
	// FlushBackendMaxBatches caps how many intermediate backend commits a single
	// flush may emit. This bounds zipper/apply overhead when FlushBackendMaxEntries
	// is very small relative to the flush size.
	//
	// 0 uses the internal default. Negative disables the cap.
	FlushBackendMaxBatches int

	// DisableWAL disables the redo/journal log while keeping the value log enabled.
	DisableWAL bool
	// JournalLanes controls the number of active commit/value log lanes (0=default).
	// Max supported lanes is 255; value-log segment sequence per lane is capped at 8,388,607.
	JournalLanes int
	// WALMaxSegmentBytes caps the size of a single WAL segment payload.
	// 0 uses the default limit.
	WALMaxSegmentBytes int64
	// JournalCompression enables best-effort zstd compression for journal/commitlog
	// segments (metadata only). The writer only keeps compressed bytes when they
	// are smaller than the raw payload, so compression never causes size
	// amplification.
	JournalCompression bool
	// RelaxedSync disables fsync on Sync operations.
	RelaxedSync bool
	// ValueLogPointerThreshold controls when WAL/vlog pointers are used.
	// Values <= 0 use a default threshold. In relaxed durability modes, the
	// default is smaller to avoid catastrophic update-heavy cliffs at large key
	// counts by pushing moderate values into the value log.
	ValueLogPointerThreshold int
	// ValueLogRawWritevMinAvgBytes controls raw grouped-frame writev usage for
	// the value log.
	//
	// 0 uses adaptive mode (no average-bytes floor); values >0 require average
	// payload bytes/record to meet this floor before raw writev is considered.
	ValueLogRawWritevMinAvgBytes int
	// ValueLogRawWritevMinBatchRecords controls the minimum grouped records before
	// raw writev is considered for value-log appends.
	//
	// Values <=0 use a default of 8.
	ValueLogRawWritevMinBatchRecords int
	// ValueLogCompression selects value-log compression behavior:
	// 0=default(unset; normalized to auto by TreeDB Open), 1=off, 2=block,
	// 3=dict, 4=auto.
	ValueLogCompression uint8
	// ValueLogBlockCodec selects block codec when block compression is enabled:
	// 0=snappy, 1=lz4.
	ValueLogBlockCodec uint8
	// ValueLogBlockTargetCompressedBytes controls block-mode grouped frame K
	// adaptation target (0=default).
	ValueLogBlockTargetCompressedBytes int
	// ValueLogIncompressibleHoldBytes configures auto-mode incompressible hold
	// window bytes (0=default).
	ValueLogIncompressibleHoldBytes int
	// ValueLogIncompressibleProbeBytes configures auto-mode hold probe interval
	// bytes (0=default).
	ValueLogIncompressibleProbeBytes int
	// ValueLogAutoPolicy controls auto-mode dict-vs-block bias:
	// 0=balanced, 1=throughput, 2=size.
	ValueLogAutoPolicy uint8
	// ValueLogMaxSegmentBytes caps the size of a single value-log segment file.
	// 0 disables the cap.
	//
	// This is an internal safety knob used by experimental index encodings
	// (e.g. packed on-disk ValuePtr) that require value-log offsets stay within a
	// smaller representable range.
	ValueLogMaxSegmentBytes int64
	// ForceValueLogPointers stores all values out-of-line in the value log.
	ForceValueLogPointers bool
	// DisableReadChecksum skips CRC verification on value-log reads.
	DisableReadChecksum bool
	// AllowUnsafe acknowledges unsafe durability options.
	// When false, Open will reject DisableWAL or RelaxedSync.
	AllowUnsafe bool
	// MaxValueLogRetainedBytes emits a warning when retained value-log bytes exceed
	// this threshold (0 disables warnings).
	MaxValueLogRetainedBytes int64
	// MaxValueLogRetainedBytesHard disables value-log pointers for new large
	// values once retained bytes exceed this threshold (0 disables the cap).
	MaxValueLogRetainedBytesHard int64

	// ValueLogDictTrain configures background dictionary training for value-log frame compression.
	// TrainBytes <= 0 disables training.
	ValueLogDictTrain compression.TrainConfig
	// ValueLogDictMaxK clamps the maximum group size (K) used for dict-compressed
	// value-log frames. Values <= 0 use the default (32).
	ValueLogDictMaxK int
	// ValueLogDictFrameEncodeLevel controls the zstd encoder level used for
	// dict-compressed value-log frames. Values <= 0 use SpeedFastest.
	ValueLogDictFrameEncodeLevel zstd.EncoderLevel
	// ValueLogDictFrameEnableEntropy enables entropy coding for dict-compressed
	// frames (higher ratio, lower throughput).
	ValueLogDictFrameEnableEntropy bool
	// ValueLogDictAdaptiveRatio enables adaptive pause of dict compression when payload ratios degrade.
	// 0 disables.
	ValueLogDictAdaptiveRatio float64
	// ValueLogDictMetricsWindowBytes controls the metrics window size (0=default).
	ValueLogDictMetricsWindowBytes int
	// ValueLogDictMetricsMinRecords is a minimum record count before pausing (0=default).
	ValueLogDictMetricsMinRecords int
	// ValueLogDictMetricsPauseBytes controls pause duration in bytes (0=default).
	ValueLogDictMetricsPauseBytes int
	// ValueLogDictIncompressibleHoldBytes enables classifier-driven hold mode for
	// high-entropy streams. While hold mode is active, dict compression attempts
	// and trainer collection are bypassed until hold bytes are consumed.
	//
	// 0 uses profile/default hold configuration; <0 explicitly disables hold
	// mode and opts out of profile defaults.
	ValueLogDictIncompressibleHoldBytes int
	// ValueLogDictProbeIntervalBytes controls periodic probe attempts while
	// incompressible hold mode is active.
	//
	// Values <=0 use a default derived from hold bytes.
	ValueLogDictProbeIntervalBytes int
	// ValueLogDictMinPayloadSavingsRatio rejects newly trained dictionaries whose
	// payload ratio does not improve by at least this fraction (0 uses a
	// throughput-oriented default: 0.02 normally, 0.05 with force pointers or
	// WAL disabled).
	ValueLogDictMinPayloadSavingsRatio float64

	// ValueLogCompressionAutotune configures the wall-time value-log compression autotuner.
	// Cached mode only (value log enabled by default).
	ValueLogCompressionAutotune valuelog.AutotuneOptions

	// ValueLogTemplateMode controls template-based compression for value-log values.
	ValueLogTemplateMode template.Mode
	// ValueLogTemplateConfig controls template creation and encoding behavior.
	ValueLogTemplateConfig template.Config
	// ValueLogTemplateReadStrict controls strict template decode behavior.
	ValueLogTemplateReadStrict bool

	// NotifyError is an optional hook for background maintenance failures.
	NotifyError func(error)
}

type VlogAutotuneBenchMode added in v0.2.0

type VlogAutotuneBenchMode string

VlogAutotuneBenchMode controls the deterministic bench mode.

const (
	VlogAutotuneBenchOff         VlogAutotuneBenchMode = "off"
	VlogAutotuneBenchNoDictFixed VlogAutotuneBenchMode = "no_dict_fixed"
	VlogAutotuneBenchDictFixed   VlogAutotuneBenchMode = "dict_fixed"
	VlogAutotuneBenchAutotune    VlogAutotuneBenchMode = "autotune"
	VlogAutotuneBenchTemplate    VlogAutotuneBenchMode = "template_fixed"
)

type VlogAutotuneBenchRequest added in v0.2.0

type VlogAutotuneBenchRequest struct {
	Mode     VlogAutotuneBenchMode
	FixedK   int
	Segments []VlogAutotuneBenchSegment
}

type VlogAutotuneBenchResult added in v0.2.0

type VlogAutotuneBenchResult struct {
	Mode         VlogAutotuneBenchMode
	Segments     []VlogAutotuneBenchSegmentResult
	RawBytes     uint64
	StoredBytes  uint64
	WallTimeNs   int64
	ThroughputMB float64
	TrainerStats compression.TrainerStats
}

func RunVlogAutotuneBench added in v0.2.0

func RunVlogAutotuneBench(req VlogAutotuneBenchRequest) (*VlogAutotuneBenchResult, error)

type VlogAutotuneBenchSegment added in v0.2.0

type VlogAutotuneBenchSegment struct {
	Name               string
	Workload           valuelog.AutotuneWorkload
	ValueSize          int
	Records            int
	EncodeNsPerRawByte float64
	IoNsPerStoredByte  float64
}

type VlogAutotuneBenchSegmentResult added in v0.2.0

type VlogAutotuneBenchSegmentResult struct {
	Name               string
	RawBytes           uint64
	StoredBytes        uint64
	WallTimeNs         int64
	ThroughputRawMBps  float64
	AttemptedFrac      float64
	KeptFrac           float64
	ObservedRatio      float64
	FramesTotal        uint64
	FramesAttempted    uint64
	FramesKept         uint64
	EncodeNsTotal      int64
	IoNsTotal          int64
	State              string
	DictID             uint64
	DictHash           uint64
	HistoryBytes       int
	K                  int
	PublishOrderingOK  bool
	TrainerProfileOK   bool
	TrainerProfileK    int
	TrainerProfileHash uint64
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL