Documentation
¶
Index ¶
- Constants
- Variables
- func VacuumIndexOffline(opts Options) error
- func ValidateFragmentationReport(rep map[string]string) error
- func ValueReaderForState(state *DBState) tree.SlabReader
- type Batch
- func (b *Batch) Close() error
- func (b *Batch) Delete(key []byte) error
- func (b *Batch) DeleteView(key []byte) error
- func (b *Batch) GetByteSize() (int, error)
- func (b *Batch) Replay(fn func(batch.Entry) error) error
- func (b *Batch) Reset()
- func (b *Batch) Set(key, value []byte) error
- func (b *Batch) SetOps(ops []batch.Entry) error
- func (b *Batch) SetPointer(key []byte, ptr page.ValuePtr) error
- func (b *Batch) SetPointerView(key []byte, ptr page.ValuePtr) error
- func (b *Batch) SetView(key, value []byte) error
- func (b *Batch) Write() error
- func (b *Batch) WriteSync() error
- type DB
- func (db *DB) AcquireSnapshot() *Snapshot
- func (db *DB) Close() error
- func (db *DB) Commit(newRootID uint64) error
- func (db *DB) CompactIndex() error
- func (db *DB) Delete(key []byte) error
- func (db *DB) DeleteSync(key []byte) error
- func (db *DB) Dir() string
- func (db *DB) FragmentationReport() (map[string]string, error)
- func (db *DB) Get(key []byte) ([]byte, error)
- func (db *DB) GetAppend(key, dst []byte) ([]byte, error)
- func (db *DB) GetUnsafe(key []byte) ([]byte, error)
- func (db *DB) Has(key []byte) (bool, error)
- func (db *DB) InlineThreshold() int
- func (db *DB) Iterator(start, end []byte) (iterator.UnsafeIterator, error)
- func (db *DB) MarkValueLogZombie(id uint32) error
- func (db *DB) NewBatch() batch.Interface
- func (db *DB) NewBatchWithSize(size int) batch.Interface
- func (db *DB) Pager() *pager.Pager
- func (db *DB) Print() error
- func (db *DB) Prune()
- func (db *DB) RefreshValueLogSet() error
- func (db *DB) ReverseIterator(start, end []byte) (iterator.UnsafeIterator, error)
- func (db *DB) Set(key, value []byte) error
- func (db *DB) SetSync(key, value []byte) error
- func (db *DB) State() *DBState
- func (db *DB) Stats() map[string]string
- func (db *DB) VacuumIndexOnline(ctx context.Context) error
- func (db *DB) ValueLogGC(ctx context.Context, opts ValueLogGCOptions) (ValueLogGCStats, error)
- func (db *DB) Zipper() *zipper.Zipper
- type DBIterator
- func (it *DBIterator) Close() error
- func (it *DBIterator) DebugStats() (queueLen int, sourcesUsed int)
- func (it *DBIterator) Domain() (start, end []byte)
- func (it *DBIterator) Error() error
- func (it *DBIterator) IsDeleted() bool
- func (it *DBIterator) Key() []byte
- func (it *DBIterator) KeyCopy(dst []byte) []byte
- func (it *DBIterator) Next()
- func (it *DBIterator) Seek(key []byte)
- func (it *DBIterator) UnsafeEntry() ([]byte, page.ValuePtr, byte)
- func (it *DBIterator) UnsafeKey() []byte
- func (it *DBIterator) UnsafeValue() []byte
- func (it *DBIterator) Valid() bool
- func (it *DBIterator) Value() []byte
- func (it *DBIterator) ValueCopy(dst []byte) []byte
- type DBState
- type DurabilityMode
- type IntegrityMode
- type Iterator
- type Options
- type Snapshot
- func (s *Snapshot) Close() error
- func (s *Snapshot) Get(key []byte) ([]byte, error)
- func (s *Snapshot) GetEntry(key []byte) (node.LeafEntry, error)
- func (s *Snapshot) GetUnsafe(key []byte) ([]byte, error)
- func (s *Snapshot) Has(key []byte) (bool, error)
- func (s *Snapshot) Pager() *pager.Pager
- func (s *Snapshot) State() *DBState
- type SnapshotPool
- type ValueLogAutoPolicy
- type ValueLogBlockCodec
- type ValueLogCompressionMode
- type ValueLogGCOptions
- type ValueLogGCStats
- type ValueLogOptions
- type ValueLogRewriteStats
- type WritePolicy
Constants ¶
const ( MetaPage0ID = 0 MetaPage1ID = 1 KeepRecent = 10000 )
Variables ¶
var ( // ErrLocked indicates the database directory is already opened by another process. ErrLocked = lockfile.ErrLocked // ErrReadOnly indicates a write was attempted on a read-only DB handle. ErrReadOnly = errors.New("treedb: read-only") )
var ErrVacuumInProgress = errors.New("online vacuum already in progress")
var ErrVacuumUnsupported = errors.New("online vacuum unsupported on this platform")
Functions ¶
func VacuumIndexOffline ¶
VacuumIndexOffline rewrites index.db into a fresh file and swaps it in.
This is intended to reclaim space (reduce `index.db` chunk count) and restore locality after long churn. It is an offline operation (requires exclusive open lock).
func ValidateFragmentationReport ¶
ValidateFragmentationReport validates basic invariants on a FragmentationReport output map. It is intended for tests and operational "health" tooling.
func ValueReaderForState ¶
func ValueReaderForState(state *DBState) tree.SlabReader
ValueReaderForState returns a reader that resolves value-log pointers.
Types ¶
type Batch ¶
type Batch struct {
// contains filtered or unexported fields
}
Batch implements the cosmos-db Batch interface.
func (*Batch) DeleteView ¶
DeleteView records a Delete without copying the key bytes. Callers must treat key as immutable until the batch is written or closed.
func (*Batch) GetByteSize ¶
func (*Batch) SetPointer ¶
SetPointer records a pointer without copying the value bytes.
func (*Batch) SetPointerView ¶ added in v0.2.0
SetPointerView records a pointer without copying the key bytes.
func (*Batch) SetView ¶
SetView records a Put without copying key/value bytes. Callers must treat key/value as immutable until the batch is written or closed.
This is intentionally not part of the public batch.Interface; it is a best-effort optimization used by higher-level layers (e.g. cached streaming).
type DB ¶
type DB struct {
// contains filtered or unexported fields
}
func (*DB) AcquireSnapshot ¶
AcquireSnapshot returns a new snapshot.
func (*DB) Commit ¶
Commit persists the new root (Sync=true by default). Note: This is usually called internally by Batch.Write or externally if manual root management. If manual, retired pages are unknown? `Commit` signature assumes manual root. If external user calls Commit, they might not know retired pages. We'll accept nil for retired if manual.
func (*DB) CompactIndex ¶
CompactIndex rewrites the entire B-Tree sequentially to the end of the file. This improves Full Scan performance by restoring physical locality. Note: This operation causes file growth as old pages are not immediately reclaimed (they are leaked to the freelist but not reused during this append-only build).
func (*DB) DeleteSync ¶
DeleteSync removes a key and syncs.
func (*DB) FragmentationReport ¶
FragmentationReport returns best-effort structural stats about the user index that help diagnose scan regressions after churn.
func (*DB) GetAppend ¶
GetAppend appends the value for the key to dst and returns the new slice. If the key is not found, it returns dst and ErrKeyNotFound.
func (*DB) GetUnsafe ¶
GetUnsafe returns the value for a key.
Semantics: Returns a safe copy of the value. For zero-copy views tied to a snapshot lifetime, use Snapshot.GetUnsafe.
func (*DB) InlineThreshold ¶
func (*DB) Iterator ¶
func (db *DB) Iterator(start, end []byte) (iterator.UnsafeIterator, error)
Iterator returns an iterator.
func (*DB) MarkValueLogZombie ¶
MarkValueLogZombie marks a value-log segment as zombie so it can be removed once all snapshots release it.
func (*DB) RefreshValueLogSet ¶ added in v0.3.0
RefreshValueLogSet publishes a new DBState with the current value-log set (excluding zombies) without creating a new commit.
func (*DB) ReverseIterator ¶
func (db *DB) ReverseIterator(start, end []byte) (iterator.UnsafeIterator, error)
ReverseIterator returns a reverse iterator.
func (*DB) VacuumIndexOnline ¶
VacuumIndexOnline rebuilds the index into a new file and swaps it in with a short writer pause. Old snapshots remain valid by pinning the previous index generation until readers drain; disk space is reclaimed once the old mmap is closed.
func (*DB) ValueLogGC ¶ added in v0.3.0
func (db *DB) ValueLogGC(ctx context.Context, opts ValueLogGCOptions) (ValueLogGCStats, error)
ValueLogGC deletes fully-unreferenced value-log segments.
It scans the user + system trees for value-log pointers, computes referenced segments, and removes segments that are:
- not referenced,
- not the currently-active segment per lane,
- and not pinned by active snapshots.
type DBIterator ¶
type DBIterator struct {
// contains filtered or unexported fields
}
DBIterator wraps tree.Iterator and holds a Snapshot.
func (*DBIterator) Close ¶
func (it *DBIterator) Close() error
func (*DBIterator) DebugStats ¶
func (it *DBIterator) DebugStats() (queueLen int, sourcesUsed int)
func (*DBIterator) Domain ¶
func (it *DBIterator) Domain() (start, end []byte)
func (*DBIterator) Error ¶
func (it *DBIterator) Error() error
func (*DBIterator) IsDeleted ¶
func (it *DBIterator) IsDeleted() bool
func (*DBIterator) Key ¶
func (it *DBIterator) Key() []byte
func (*DBIterator) KeyCopy ¶
func (it *DBIterator) KeyCopy(dst []byte) []byte
func (*DBIterator) Next ¶
func (it *DBIterator) Next()
func (*DBIterator) UnsafeEntry ¶
func (it *DBIterator) UnsafeEntry() ([]byte, page.ValuePtr, byte)
func (*DBIterator) UnsafeKey ¶
func (it *DBIterator) UnsafeKey() []byte
func (*DBIterator) UnsafeValue ¶
func (it *DBIterator) UnsafeValue() []byte
func (*DBIterator) Valid ¶
func (it *DBIterator) Valid() bool
func (*DBIterator) Value ¶
func (it *DBIterator) Value() []byte
func (*DBIterator) ValueCopy ¶
func (it *DBIterator) ValueCopy(dst []byte) []byte
type DurabilityMode ¶ added in v0.3.0
type DurabilityMode uint8
DurabilityMode configures cached-mode durability semantics.
These modes are explicit and intentionally replace the previous boolean combination of DisableWAL + RelaxedSync + AllowUnsafe.
const ( // DurabilityDurable enables WAL (journal) and uses fsync for sync operations. DurabilityDurable DurabilityMode = iota // DurabilityWALOnRelaxed keeps WAL enabled but disables fsync (crash-consistent). DurabilityWALOnRelaxed // DurabilityWALOffRelaxed disables WAL and fsync (unsafe; recent writes may be lost). DurabilityWALOffRelaxed )
type IntegrityMode ¶ added in v0.3.0
type IntegrityMode uint8
IntegrityMode configures value-log read integrity checks.
It intentionally replaces the previous DisableReadChecksum boolean.
const ( // IntegrityVerify enables checksum verification on value-log reads. IntegrityVerify IntegrityMode = iota // IntegritySkipChecksums disables checksum verification on value-log reads (unsafe). IntegritySkipChecksums )
type Iterator ¶
type Iterator interface {
Valid() bool
Next()
Key() []byte
Value() []byte
KeyCopy(dst []byte) []byte
ValueCopy(dst []byte) []byte
Close() error
Error() error
// Reset resets the iterator for reuse.
Reset(start, end []byte)
}
Iterator is the internal interface for iteration.
type Options ¶
type Options struct {
Dir string
// ReadOnly opens the database without acquiring an exclusive lock and without
// modifying on-disk state (no recovery truncation, no WAL replay, no background
// maintenance). Only read operations are supported.
ReadOnly bool
ChunkSize int64 // Default 16MiB
// DictDBChunkSize controls the mmap chunk size used for the `dictdb/` side
// store when TreeDB is opened via the public `treedb.Open` wrapper.
//
// It is intentionally independent of ChunkSize so benchmarks and callers can
// tune the main index pager without inflating dictdb disk usage.
//
// Values <= 0 use a default of 1MiB.
DictDBChunkSize int64
// TemplateDBChunkSize controls the mmap chunk size used for the `templatedb/`
// side store when template compression is enabled.
//
// Values <= 0 use a default of 1MiB.
TemplateDBChunkSize int64
KeepRecent uint64 // Default 10000
// PagerSyncConcurrency controls how many goroutines may msync dirty chunks
// in parallel during Sync. Values <= 0 use the default (1).
PagerSyncConcurrency int
// PagerMmapPopulate enables MAP_POPULATE on Linux when mmapping index.db
// chunks. This can reduce minor-fault overhead under random access patterns
// at the cost of increased work at map/grow time.
PagerMmapPopulate bool
// PagerPrefetchOnRead enables best-effort prefetch hints (madvise WILLNEED)
// for mmapped index chunks (Linux only). When enabled, TreeDB may issue
// prefetch requests opportunistically (e.g. before rewriting child pages
// during checkpoint/merge). It is a no-op on unsupported platforms.
PagerPrefetchOnRead bool
// Durability configures cached-mode durability semantics.
//
// The default (zero) is DurabilityDurable.
Durability DurabilityMode
// DisableBackgroundPrune keeps pruning on the commit critical path (legacy
// behavior). When false (default), a bounded background pruner frees pages
// asynchronously to reduce commit latency under churn.
DisableBackgroundPrune bool
// PruneInterval controls how often the background pruner wakes up (0 uses a
// default).
PruneInterval time.Duration
// PruneMaxPages bounds how many pages are freed per pruner tick (0 uses a
// default; <0 means unlimited).
PruneMaxPages int
// PruneMaxDuration bounds how long a pruner tick may run (0 uses a default;
// <0 means unlimited).
PruneMaxDuration time.Duration
FlushThreshold int64
// MemtableMode selects the cached-mode memtable implementation.
// Supported values: "skiplist", "hash_sorted", "btree", "adaptive".
MemtableMode string
// MemtableShards controls the number of mutable memtable shards in cached
// mode. Values <= 0 use a runtime-dependent default.
MemtableShards int
// PreferAppendAlloc makes the page allocator ignore the freelist and append
// new pages instead. This can improve scan locality under churn at the cost
// of file growth (space is reclaimed later via vacuum).
PreferAppendAlloc bool
// FreelistRegionPages and FreelistRegionRadius bias freelist reuse toward
// nearby page regions to improve locality. Leave both at 0 to disable the
// bias (default). If either is set, missing values will use defaults.
// Set FreelistRegionRadius < 0 to force-disable the bias.
FreelistRegionPages uint64
FreelistRegionRadius int
// LeafFillTargetPPM and InternalFillTargetPPM control how full newly-written
// B+Tree pages are allowed to become before forcing a split (soft-full).
// Lower values reduce split churn and slow re-fragmentation under updates, at
// the cost of higher page count (more index bytes).
//
// Values are in parts-per-million where 1_000_000 means "allow full pages"
// (current behavior). Zero uses the default (1_000_000).
LeafFillTargetPPM uint32
InternalFillTargetPPM uint32
// MaintenanceOpsPerCoalesce controls the maintenance budget during zipper
// merge. It bounds coalesce work to roughly len(ops)/K operations per batch.
// 0 uses the default; negative disables the budget (full maintenance).
MaintenanceOpsPerCoalesce int
// LeafPrefixCompression enables prefix-compressed leaf nodes for new pages.
LeafPrefixCompression bool
// IndexColumnarLeaves enables the experimental columnar leaf encoding for new pages.
IndexColumnarLeaves bool
// IndexPackedValuePtr enables the experimental packed 12-byte ValuePtr encoding
// for pointer entries in new leaf pages.
//
// Packed pointers store ValuePtr.Offset as u32 on disk. Callers must ensure
// value-log segments are rotated such that offsets remain representable.
IndexPackedValuePtr bool
// IndexInternalBaseDelta enables the experimental internal-node base-delta encoding.
IndexInternalBaseDelta bool
// MaxQueuedMemtables controls how much immutable-memtable backlog the cached
// layer will allow before applying backpressure (i.e. forcing flush work on
// writers). A negative value disables backpressure entirely (higher short-term
// ingest, but potentially unbounded flush debt). Zero uses the default.
MaxQueuedMemtables int
// SlowdownBacklogSeconds begins applying writer backpressure when queued flush
// backlog exceeds this many seconds of estimated flush work (0 disables).
SlowdownBacklogSeconds float64
// StopBacklogSeconds blocks writers when queued flush backlog exceeds this many
// seconds of estimated flush work (0 disables).
StopBacklogSeconds float64
// MaxBacklogBytes is an absolute cap on queued flush backlog bytes (0 disables).
MaxBacklogBytes int64
// WriterFlushMaxMemtables bounds how much queued work a writer will help flush
// per write when backpressure is active (0 uses a default).
WriterFlushMaxMemtables int
// WriterFlushMaxDuration bounds how long a writer will spend helping flush per
// write when backpressure is active (0 disables the time bound).
WriterFlushMaxDuration time.Duration
// FlushBuildConcurrency controls how many goroutines may be used to build a
// combined flush batch from multiple immutable memtables in cached mode.
// Values <= 1 disable parallelism.
FlushBuildConcurrency int
// FlushBuildMinEntries gates the parallel build path by total entries.
// Values <= 0 use a default of 16k.
FlushBuildMinEntries int
// FlushBuildMinUnits gates the parallel build path by number of queued units.
// Values <= 0 use a default of 2.
FlushBuildMinUnits int
// FlushBuildChunkCap controls the maximum entries per build chunk.
// A value of 0 enables adaptive chunk sizing, values < 0 use the fixed default of 8192,
// and values > 0 set an explicit cap.
FlushBuildChunkCap int
// FlushBuildChunkTargetBytes controls adaptive chunk sizing (bytes per chunk).
// Values <= 0 use a default of 2MiB.
FlushBuildChunkTargetBytes int
// FlushBuildChunkMinBytes clamps adaptive chunk sizes (minimum bytes).
// Values <= 0 use a default of 1MiB.
FlushBuildChunkMinBytes int
// FlushBuildChunkMaxBytes clamps adaptive chunk sizes (maximum bytes).
// Values <= 0 use a default of 4MiB.
FlushBuildChunkMaxBytes int
// FlushBuildPrefetchUnits controls how many memtables to start building ahead
// of the consumer. Values <= 0 use FlushBuildConcurrency.
FlushBuildPrefetchUnits int
// FlushBackendMaxEntries caps how many operations are buffered into a single
// backend batch before committing it and continuing with a fresh batch.
//
// This increases backend commit cadence during very large flushes, which can
// reduce index.db high-watermark growth under small KeepRecent windows by
// making retired pages eligible for reuse sooner.
//
// 0 uses the internal default. Negative disables chunking (single backend
// commit per flush).
FlushBackendMaxEntries int
// FlushBackendMaxBatches caps how many intermediate backend commits a single
// flush may emit (0=default, <0=disable cap).
FlushBackendMaxBatches int
// JournalLanes controls the number of active commit/value log lanes (0=default).
// Max supported lanes is 255; value-log segment sequence per lane is capped at 8,388,607.
JournalLanes int
// WALMaxSegmentBytes caps the size of a single WAL segment payload.
// 0 uses the default limit.
WALMaxSegmentBytes int64
// JournalCompression enables best-effort zstd compression for cached-mode
// journal/commitlog segments (metadata only).
//
// The redo log will only keep compressed bytes when they are smaller than the
// raw payload, so compression never causes size amplification.
JournalCompression bool
// ValueLog configures value-log pointer behavior and read integrity.
ValueLog ValueLogOptions
// NotifyError is an optional hook for background maintenance failures.
NotifyError func(error)
// VerifyOnRead forces checksum verification on every index page read,
// bypassing the verified-page cache.
VerifyOnRead bool
// DisableSideStores skips opening dictdb/templatedb side stores.
// This is intended for internal side-store usage (e.g. templatedb itself).
DisableSideStores bool
// DisablePiggybackCompaction disables opportunistic defragmentation during writes.
// When false (default), nodes are rewritten if their siblings are physically
// distant, keeping the tree clustered. Set to true to maximize write speed.
DisablePiggybackCompaction bool
// BackgroundCheckpointInterval enables periodic durable checkpoints in cached
// mode. A checkpoint creates a backend sync boundary and trims
// cached-mode WAL segments to keep `wal/` growth bounded.
//
// Semantics:
// - `0` uses a default.
// - `<0` disables the periodic interval trigger.
BackgroundCheckpointInterval time.Duration
// BackgroundCheckpointIdleDuration triggers an opportunistic checkpoint after
// a period of write-idleness in cached mode.
//
// Semantics:
// - `0` uses a default.
// - `<0` disables the idle trigger.
BackgroundCheckpointIdleDuration time.Duration
// BackgroundIndexVacuumInterval enables periodic online index vacuum passes.
// `0` uses a default; `<0` disables.
BackgroundIndexVacuumInterval time.Duration
// BackgroundIndexVacuumSpanRatioPPM sets the span ratio threshold that
// triggers a vacuum pass (0 uses a default).
BackgroundIndexVacuumSpanRatioPPM uint32
// MaxWALBytes triggers an immediate checkpoint in cached mode when the sum of
// WAL segment sizes exceeds this many bytes (0 uses a default; <0 disables the
// size trigger). This is an operational safety cap; it does not make each
// individual write durable (use *Sync APIs for that).
MaxWALBytes int64
}
type Snapshot ¶
type Snapshot struct {
// contains filtered or unexported fields
}
type SnapshotPool ¶
type SnapshotPool struct {
// contains filtered or unexported fields
}
SnapshotPool manages a pool of Snapshot objects to reduce allocation overhead.
func NewSnapshotPool ¶
func NewSnapshotPool() *SnapshotPool
func (*SnapshotPool) Get ¶
func (p *SnapshotPool) Get() *Snapshot
func (*SnapshotPool) Put ¶
func (p *SnapshotPool) Put(s *Snapshot)
type ValueLogAutoPolicy ¶ added in v0.3.0
type ValueLogAutoPolicy uint8
ValueLogAutoPolicy controls auto-mode dict vs block selection bias.
const ( ValueLogAutoBalanced ValueLogAutoPolicy = iota ValueLogAutoThroughput ValueLogAutoSize )
type ValueLogBlockCodec ¶ added in v0.3.0
type ValueLogBlockCodec uint8
ValueLogBlockCodec selects the block codec used for block compression modes.
const ( ValueLogBlockSnappy ValueLogBlockCodec = iota ValueLogBlockLZ4 )
type ValueLogCompressionMode ¶ added in v0.3.0
type ValueLogCompressionMode uint8
ValueLogCompressionMode selects value-log compression behavior in cached mode.
const ( // ValueLogCompressionOff stores value-log grouped frames uncompressed. // // Zero is intentionally reserved as "unset/default". // db.Open normalizes zero to ValueLogCompressionAuto. ValueLogCompressionOff ValueLogCompressionMode = iota + 1 // ValueLogCompressionBlock uses block compression without dictionaries. ValueLogCompressionBlock // ValueLogCompressionDict uses dictionary compression when available. ValueLogCompressionDict // ValueLogCompressionAuto adaptively chooses off/block/dict. ValueLogCompressionAuto )
type ValueLogGCOptions ¶ added in v0.3.0
type ValueLogGCOptions struct {
DryRun bool
}
ValueLogGCOptions controls value-log garbage collection.
type ValueLogGCStats ¶ added in v0.3.0
type ValueLogGCStats struct {
SegmentsTotal int
SegmentsReferenced int
SegmentsActive int
SegmentsEligible int
SegmentsDeleted int
BytesTotal int64
BytesReferenced int64
BytesActive int64
BytesEligible int64
BytesDeleted int64
}
ValueLogGCStats summarizes value-log GC work.
type ValueLogOptions ¶ added in v0.3.0
type ValueLogOptions struct {
// Compression selects value-log compression behavior.
Compression ValueLogCompressionMode
// BlockCodec selects the block codec for block compression.
BlockCodec ValueLogBlockCodec
// BlockTargetCompressedBytes guides grouped block size adaptation.
//
// 0 uses a default.
BlockTargetCompressedBytes int
// IncompressibleHoldBytes configures auto-mode suppression duration after
// repeated incompressible probes.
//
// 0 uses a default.
IncompressibleHoldBytes int
// IncompressibleProbeIntervalBytes controls probe cadence while
// incompressible hold is active.
//
// 0 uses a default.
IncompressibleProbeIntervalBytes int
// AutoPolicy controls auto-mode bias (throughput, balanced, size).
AutoPolicy ValueLogAutoPolicy
// PointerThreshold controls when value-log pointers are used.
// Values <= 0 use a default threshold. In cached mode, relaxed durability
// settings may choose a smaller default to avoid large-scale update cliffs by
// pushing moderate values into the value log.
PointerThreshold int
// ForcePointers stores all values out-of-line in the value log (no inline values).
ForcePointers bool
// RawWritevMinAvgBytes controls raw grouped-frame writev usage.
//
// 0 enables adaptive mode (no average-bytes floor).
RawWritevMinAvgBytes int
// RawWritevMinBatchRecords controls minimum grouped records before raw writev
// is considered.
//
// <=0 uses the default.
RawWritevMinBatchRecords int
// ReadIntegrity configures checksum verification on value-log reads.
ReadIntegrity IntegrityMode
// MaxRetainedBytes emits a warning when retained value-log bytes exceed this
// threshold (0 disables warnings). Cached mode only.
MaxRetainedBytes int64
// MaxRetainedBytesHard disables value-log pointers for new large values once
// retained bytes exceed this threshold (0 disables the cap).
MaxRetainedBytesHard int64
// DictLookup provides dictionary bytes for value-log decoding.
DictLookup valuelog.DictLookup
// DictTrain configures background dictionary training for value-log frame
// compression in cached mode.
DictTrain compression.TrainConfig
// DictAdaptiveRatio enables best-effort adaptive disable/pause of value-log
// dictionary compression when payload compression ratios degrade (0 disables).
DictAdaptiveRatio float64
// DictMetricsWindowBytes controls the rolling window size for ratio tracking (0=default).
DictMetricsWindowBytes int
// DictMetricsMinRecords controls how many records must be observed in a window
// before adaptive pause triggers (0=default).
DictMetricsMinRecords int
// DictMetricsPauseBytes controls how long to pause dict compression after a degraded
// window is detected (0=default).
DictMetricsPauseBytes int
// DictIncompressibleHoldBytes enables classifier-driven hold mode for
// high-entropy streams. While hold mode is active, dict attempts and trainer
// collection are bypassed until hold bytes are consumed.
//
// 0 uses profile/default hold configuration; <0 explicitly disables hold
// mode and opts out of profile defaults.
DictIncompressibleHoldBytes int
// DictProbeIntervalBytes controls periodic probe attempts while
// incompressible hold mode is active.
//
// <=0 uses a default derived from hold bytes.
DictProbeIntervalBytes int
// DictMinPayloadSavingsRatio rejects newly trained dictionaries whose payload
// ratio does not improve by at least this fraction (0 uses a cached-mode
// throughput-oriented default: 0.02 normally, 0.05 with ForcePointers or
// WAL disabled).
DictMinPayloadSavingsRatio float64
// DictMaxK clamps the maximum group size (K) used for value-log dict-compressed
// frames.
//
// Larger K can improve compression ratio (more cross-record matches) and can
// reduce framing overhead, but may increase CPU and tail latency due to larger
// encode/decode units.
//
// Values <= 0 use the default (32). Values above the engine maximum are clamped.
DictMaxK int
// DictFrameEncodeLevel controls the zstd encoder level used for dict-compressed
// value-log frames.
//
// Values <= 0 use the default (SpeedFastest).
DictFrameEncodeLevel zstd.EncoderLevel
// DictFrameEnableEntropy enables entropy coding for dict-compressed value-log
// frames (higher ratio, lower throughput).
//
// Default is false (throughput-focused: no-entropy compression).
DictFrameEnableEntropy bool
// CompressionAutotune configures the wall-time value-log compression autotuner.
CompressionAutotune valuelog.AutotuneOptions
// TemplateMode controls template-based compression for value-log values.
TemplateMode template.Mode
// TemplateConfig controls template creation and encoding behavior.
TemplateConfig template.Config
// TemplateReadStrict controls strict template decode behavior.
TemplateReadStrict bool
// TemplateLookup provides template definition bytes for value-log decoding.
TemplateLookup valuelog.TemplateLookup
// TemplateDecodeOptions controls decode caps for template payloads.
TemplateDecodeOptions template.DecodeOptions
}
ValueLogOptions configures value-log pointer behavior and optional compression/dict tuning.
type ValueLogRewriteStats ¶ added in v0.3.0
type ValueLogRewriteStats struct {
SegmentsBefore int
SegmentsAfter int
BytesBefore int64
BytesAfter int64
RecordsCopied int
}
ValueLogRewriteStats summarizes rewrite compaction results.
func ValueLogRewriteOffline ¶ added in v0.3.0
func ValueLogRewriteOffline(opts Options) (ValueLogRewriteStats, error)
ValueLogRewriteOffline rewrites value-log pointers into new segments and swaps index.db to reference the new log. This is an offline operation (requires exclusive lock and a clean commitlog).
type WritePolicy ¶
type WritePolicy struct {
FlushThreshold int64 // Size of memtable before flush
InlineThreshold int // Max size of value to store inline
}
WritePolicy defines the heuristics and thresholds for write operations.
func DefaultWritePolicy ¶
func DefaultWritePolicy() WritePolicy
DefaultWritePolicy returns the default policy.