Documentation
¶
Index ¶
- Constants
- func Copy(ctx context.Context, r ReadHandle, out Writable, offset, length uint64) error
- func IsExternalTable(provider Provider, fileNum base.DiskFileNum) bool
- func IsLocalBlobFile(provider Provider, fileNum base.DiskFileNum) bool
- func IsLocalTable(provider Provider, fileNum base.DiskFileNum) bool
- type CreateOptions
- type CreatorID
- type MemObj
- func (f *MemObj) Abort()
- func (f *MemObj) Close() error
- func (f *MemObj) Data() []byte
- func (f *MemObj) Finish() error
- func (f *MemObj) NewReadHandle(readBeforeSize ReadBeforeSize) ReadHandle
- func (f *MemObj) ReadAt(ctx context.Context, p []byte, off int64) error
- func (f *MemObj) Size() int64
- func (f *MemObj) Write(p []byte) error
- type NoopReadHandle
- type ObjectMetadata
- type OpenOptions
- type Provider
- type ReadBeforeSize
- type ReadHandle
- type Readable
- type RemoteObjectBacking
- type RemoteObjectBackingHandle
- type RemoteObjectToAttach
- type SharedCleanupMethod
- type Writable
Constants ¶
const ( // NoReadBefore specifies no read-before. NoReadBefore ReadBeforeSize = 0 // ReadBeforeForNewReader is used for a new Reader reading the footer, // metaindex, properties. 32KB is unnecessarily large, but it is still small // when considering remote object storage. ReadBeforeForNewReader = 32 * 1024 // ReadBeforeForIndexAndFilter is used for an iterator reading the top-level // index, filter and second-level index blocks. // // Consider a 128MB sstable with 32KB blocks, so 4K blocks. Say keys are // ~100 bytes, then the size of the index blocks is ~400KB. 512KB is a bit // bigger, and not too large to be a memory concern. ReadBeforeForIndexAndFilter = 512 * 1024 )
Variables ¶
This section is empty.
Functions ¶
func IsExternalTable ¶
func IsExternalTable(provider Provider, fileNum base.DiskFileNum) bool
IsExternalTable returns true if a table with the given fileNum exists and is external.
func IsLocalBlobFile ¶ added in v2.1.0
func IsLocalBlobFile(provider Provider, fileNum base.DiskFileNum) bool
IsLocalBlobFile returns true if a blob file with the given fileNum exists and is local.
func IsLocalTable ¶
func IsLocalTable(provider Provider, fileNum base.DiskFileNum) bool
IsLocalTable returns true if a table with the given fileNum exists and is local.
Types ¶
type CreateOptions ¶
type CreateOptions struct {
// the provider has shared storage configured.
PreferSharedStorage bool
// The default (zero) value is SharedRefTracking.
SharedCleanupMethod SharedCleanupMethod
// WriteCategory is used for the object when it is created on local storage
// to collect aggregated write metrics for each write source.
WriteCategory vfs.DiskWriteCategory
}
CreateOptions contains optional arguments for Create.
type CreatorID ¶
type CreatorID uint64
CreatorID identifies the DB instance that originally created a shared object. This ID is incorporated in backing object names. Must be non-zero.
func (CreatorID) SafeFormat ¶
func (c CreatorID) SafeFormat(w redact.SafePrinter, _ rune)
SafeFormat implements redact.SafeFormatter.
type MemObj ¶
type MemObj struct {
// contains filtered or unexported fields
}
MemObj is an in-memory implementation of the Writable and Readable that holds all data in memory.
A zero MemObj can be populated with data through its Writable methods, and then can be repeatedly used as a Readable.
func (*MemObj) NewReadHandle ¶
func (f *MemObj) NewReadHandle(readBeforeSize ReadBeforeSize) ReadHandle
NewReadHandle is part of the Readable interface.
type NoopReadHandle ¶
type NoopReadHandle struct {
// contains filtered or unexported fields
}
NoopReadHandle can be used by Readable implementations that don't support read-ahead.
func MakeNoopReadHandle ¶
func MakeNoopReadHandle(r Readable) NoopReadHandle
MakeNoopReadHandle initializes a NoopReadHandle.
func (*NoopReadHandle) Close ¶
func (*NoopReadHandle) Close() error
Close is part of the ReadHandle interface.
func (*NoopReadHandle) RecordCacheHit ¶
func (*NoopReadHandle) RecordCacheHit(_ context.Context, offset, size int64)
RecordCacheHit is part of the ReadHandle interface.
func (*NoopReadHandle) SetupForCompaction ¶
func (*NoopReadHandle) SetupForCompaction()
SetupForCompaction is part of the ReadHandle interface.
type ObjectMetadata ¶
type ObjectMetadata struct {
DiskFileNum base.DiskFileNum
FileType base.FileType
// The fields below are only set if the object is on remote storage.
Remote struct {
// CreatorID identifies the DB instance that originally created the object.
//
// Only used when CustomObjectName is not set.
CreatorID CreatorID
// CreatorFileNum is the identifier for the object within the context of the
// DB instance that originally created the object.
//
// Only used when CustomObjectName is not set.
CreatorFileNum base.DiskFileNum
// CustomObjectName (if it is set) overrides the object name that is normally
// derived from the CreatorID and CreatorFileNum.
CustomObjectName string
// CleanupMethod indicates the method for cleaning up unused shared objects.
CleanupMethod SharedCleanupMethod
// Locator identifies the remote.Storage implementation for this object.
Locator remote.Locator
// Storage is the remote.Storage object corresponding to the Locator. Used
// to avoid lookups in hot paths.
Storage remote.Storage
}
}
ObjectMetadata contains the metadata required to be able to access an object.
func (*ObjectMetadata) AssertValid ¶
func (meta *ObjectMetadata) AssertValid()
AssertValid checks that the metadata is sane.
func (*ObjectMetadata) IsExternal ¶
func (meta *ObjectMetadata) IsExternal() bool
IsExternal returns true if the object is on remote storage but is not owned by any Pebble instances in the cluster.
func (*ObjectMetadata) IsRemote ¶
func (meta *ObjectMetadata) IsRemote() bool
IsRemote returns true if the object is on remote storage.
func (*ObjectMetadata) IsShared ¶
func (meta *ObjectMetadata) IsShared() bool
IsShared returns true if the object is on remote storage and is owned by a Pebble instance in the cluster (potentially shared between multiple instances).
type OpenOptions ¶
type OpenOptions struct {
// MustExist converts a not-exist error into a corruption error, and adds
// extra information helpful for debugging.
MustExist bool
}
OpenOptions contains optional arguments for OpenForReading.
type Provider ¶
type Provider interface {
// OpenForReading opens an existing object.
OpenForReading(
ctx context.Context, fileType base.FileType, FileNum base.DiskFileNum, opts OpenOptions,
) (Readable, error)
// Create creates a new object and opens it for writing.
//
// The object is not guaranteed to be durable (accessible in case of crashes)
// until Sync is called.
Create(
ctx context.Context, fileType base.FileType, FileNum base.DiskFileNum, opts CreateOptions,
) (w Writable, meta ObjectMetadata, err error)
// Remove removes an object.
//
// The object is not guaranteed to be durably removed until Sync is called.
Remove(fileType base.FileType, FileNum base.DiskFileNum) error
// Sync flushes the metadata from creation or removal of objects since the last Sync.
// This includes objects that have been Created but for which
// Writable.Finish() has not yet been called.
Sync() error
// LinkOrCopyFromLocal creates a new object that is either a copy of a given
// local file or a hard link (if the new object is created on the same FS, and
// if the FS supports it).
//
// The object is not guaranteed to be durable (accessible in case of crashes)
// until Sync is called.
LinkOrCopyFromLocal(
ctx context.Context,
srcFS vfs.FS,
srcFilePath string,
dstFileType base.FileType,
dstFileNum base.DiskFileNum,
opts CreateOptions,
) (ObjectMetadata, error)
// Lookup returns the metadata of an object that is already known to the Provider.
// Does not perform any I/O.
Lookup(fileType base.FileType, FileNum base.DiskFileNum) (ObjectMetadata, error)
// Path returns an internal, implementation-dependent path for the object. It is
// meant to be used for informational purposes (like logging).
Path(meta ObjectMetadata) string
// Size returns the size of the object.
Size(meta ObjectMetadata) (int64, error)
// List returns the objects currently known to the provider. Does not perform any I/O.
List() []ObjectMetadata
// SetCreatorID sets the CreatorID which is needed in order to use shared
// objects. Remote object usage is disabled until this method is called the
// first time. Once set, the Creator ID is persisted and cannot change.
//
// Cannot be called if shared storage is not configured for the provider.
SetCreatorID(creatorID CreatorID) error
IsSharedForeign(meta ObjectMetadata) bool
// RemoteObjectBacking encodes the remote object metadata for the given object.
RemoteObjectBacking(meta *ObjectMetadata) (RemoteObjectBackingHandle, error)
// CreateExternalObjectBacking creates a backing for an existing object with a
// custom object name. The object is considered to be managed outside of
// Pebble and will never be removed by Pebble.
CreateExternalObjectBacking(locator remote.Locator, objName string) (RemoteObjectBacking, error)
// GetExternalObjects returns a list of DiskFileNums corresponding to all
// objects that are backed by the given external object.
GetExternalObjects(locator remote.Locator, objName string) []base.DiskFileNum
// AttachRemoteObjects registers existing remote objects with this provider.
//
// The objects are not guaranteed to be durable (accessible in case of
// crashes) until Sync is called.
AttachRemoteObjects(objs []RemoteObjectToAttach) ([]ObjectMetadata, error)
Close() error
// IsNotExistError indicates whether the error is known to report that a file or
// directory does not exist.
IsNotExistError(err error) bool
// CheckpointState saves any saved state on local disk to the specified
// directory on the specified VFS. A new Pebble instance instantiated at that
// path should be able to resolve references to the specified files.
CheckpointState(fs vfs.FS, dir string, fileNums []base.DiskFileNum) error
// Metrics returns metrics about objstorage. Currently, it only returns metrics
// about the shared cache.
Metrics() sharedcache.Metrics
}
Provider is a singleton object used to access and manage objects.
An object is conceptually like a large immutable file. The main use of objects is for storing sstables; in the future it could also be used for blob storage.
The Provider can only manage objects that it knows about - either objects created by the provider, or existing objects the Provider was informed about via AddObjects.
Objects are currently backed by a vfs.File or a remote.Storage object.
type ReadBeforeSize ¶
type ReadBeforeSize int64
ReadBeforeSize specifies whether the first read should read additional bytes before the offset, and how big the overall read should be. This is just a suggestion that the callee can ignore (and does ignore in fileReadable).
When 0, the first read will only read what it is asked to read, say n bytes. When it is a value b > 0, if b > n, then the read will be padded by an additional b-n bytes to the left, resulting in an overall read size of b. This behavior is akin to what the read-ahead implementation does -- when the n bytes are not buffered, and there is read-ahead of b > n, the read length is b bytes.
type ReadHandle ¶
type ReadHandle interface {
// ReadAt reads len(p) bytes into p starting at offset off.
//
// Does not return partial results; if off + len(p) is past the end of the
// object, an error is returned.
//
// Parallel ReadAt calls on the same ReadHandle are not allowed.
ReadAt(ctx context.Context, p []byte, off int64) error
Close() error
// SetupForCompaction informs the implementation that the read handle will
// be used to read data blocks for a compaction. The implementation can expect
// sequential reads, and can decide to not retain data in any caches.
SetupForCompaction()
// RecordCacheHit informs the implementation that we were able to retrieve a
// block from cache. This is useful for example when the implementation is
// trying to detect a sequential reading pattern.
RecordCacheHit(ctx context.Context, offset, size int64)
}
ReadHandle is used to perform reads that are related and might benefit from optimizations like read-ahead.
type Readable ¶
type Readable interface {
// ReadAt reads len(p) bytes into p starting at offset off.
//
// Does not return partial results; if off + len(p) is past the end of the
// object, an error is returned.
//
// Clients of ReadAt can execute parallel ReadAt calls on the
// same Readable.
ReadAt(ctx context.Context, p []byte, off int64) error
Close() error
// Size returns the size of the object.
Size() int64
// NewReadHandle creates a read handle for ReadAt requests that are related
// and can benefit from optimizations like read-ahead.
//
// The ReadHandle must be closed before the Readable is closed.
//
// Multiple separate ReadHandles can be used.
NewReadHandle(readBeforeSize ReadBeforeSize) ReadHandle
}
Readable is the handle for an object that is open for reading.
type RemoteObjectBacking ¶
type RemoteObjectBacking []byte
RemoteObjectBacking encodes the metadata necessary to incorporate a shared object into a different Pebble instance. The encoding is specific to a given Provider implementation.
type RemoteObjectBackingHandle ¶
type RemoteObjectBackingHandle interface {
// Get returns the backing. The backing is only guaranteed to be valid until
// Close is called (or until the Provider is closed). If Close was already
// called, returns an error.
Get() (RemoteObjectBacking, error)
Close()
}
RemoteObjectBackingHandle is a container for a RemoteObjectBacking which ensures that the backing stays valid. A backing can otherwise become invalid if this provider unrefs the shared object. The RemoteObjectBackingHandle delays any unref until Close.
type RemoteObjectToAttach ¶
type RemoteObjectToAttach struct {
// FileNum is the file number that will be used to refer to this object (in
// the context of this instance).
FileNum base.DiskFileNum
FileType base.FileType
// Backing contains the metadata for the remote object backing (normally
// generated from a different instance, but using the same Provider
// implementation).
Backing RemoteObjectBacking
}
RemoteObjectToAttach contains the arguments needed to attach an existing remote object.
type SharedCleanupMethod ¶
type SharedCleanupMethod uint8
SharedCleanupMethod indicates the method for cleaning up unused shared objects.
const ( // keep track of references via reference marker objects. SharedRefTracking SharedCleanupMethod = iota // objstorage provider never deletes such objects. SharedNoCleanup )
type Writable ¶
type Writable interface {
// Write writes len(p) bytes from p to the underlying object. The data is not
// guaranteed to be durable until Finish is called.
//
// Note that Write *is* allowed to modify the slice passed in, whether
// temporarily or permanently. Callers of Write need to take this into
// account.
Write(p []byte) error
// Finish completes the object and makes the data durable.
// No further calls are allowed after calling Finish.
Finish() error
// Abort gives up on finishing the object. There is no guarantee about whether
// the object exists after calling Abort.
// No further calls are allowed after calling Abort.
Abort()
}
Writable is the handle for an object that is open for writing. Either Finish or Abort must be called.