rowcontainer

package

v0.0.1 Latest Latest Go to latest Published: Mar 18, 2022 License: Apache-2.0 Imports: 24 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/labulakalia/sqlfmt

Links

Open Source Insights

Documentation ¶

Index ¶

type AllRowsIterator
- func (i *AllRowsIterator) Close()
type DeDupingRowContainer
type DiskBackedIndexedRowContainer
- func NewDiskBackedIndexedRowContainer(ordering colinfo.ColumnOrdering, typs []*types.T, evalCtx *tree.EvalContext, ...) *DiskBackedIndexedRowContainer
- func (f *DiskBackedIndexedRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error
- func (f *DiskBackedIndexedRowContainer) Close(ctx context.Context)
- func (f *DiskBackedIndexedRowContainer) GetRow(ctx context.Context, pos int) (tree.IndexedRow, error)
- func (f *DiskBackedIndexedRowContainer) Reorder(ctx context.Context, ordering colinfo.ColumnOrdering) error
- func (f *DiskBackedIndexedRowContainer) UnsafeReset(ctx context.Context) error
type DiskBackedNumberedRowContainer
- func NewDiskBackedNumberedRowContainer(deDup bool, types []*types.T, evalCtx *tree.EvalContext, ...) *DiskBackedNumberedRowContainer
- func (d *DiskBackedNumberedRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) (int, error)
- func (d *DiskBackedNumberedRowContainer) Close(ctx context.Context)
- func (d *DiskBackedNumberedRowContainer) GetRow(ctx context.Context, idx int, skip bool) (rowenc.EncDatumRow, error)
- func (d *DiskBackedNumberedRowContainer) SetupForRead(ctx context.Context, accesses [][]int)
- func (d *DiskBackedNumberedRowContainer) Spilled() bool
- func (d *DiskBackedNumberedRowContainer) UnsafeReset(ctx context.Context) error
- func (d *DiskBackedNumberedRowContainer) UsingDisk() bool
type DiskBackedRowContainer
- func (f *DiskBackedRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error
- func (f *DiskBackedRowContainer) AddRowWithDeDup(ctx context.Context, row rowenc.EncDatumRow) (int, error)
- func (f *DiskBackedRowContainer) Close(ctx context.Context)
- func (f *DiskBackedRowContainer) DoDeDuplicate()
- func (f *DiskBackedRowContainer) Init(ordering colinfo.ColumnOrdering, types []*types.T, evalCtx *tree.EvalContext, ...)
- func (f *DiskBackedRowContainer) InitTopK()
- func (f *DiskBackedRowContainer) Len() int
- func (f *DiskBackedRowContainer) MaybeReplaceMax(ctx context.Context, row rowenc.EncDatumRow) error
- func (f *DiskBackedRowContainer) NewFinalIterator(ctx context.Context) RowIterator
- func (f *DiskBackedRowContainer) NewIterator(ctx context.Context) RowIterator
- func (f *DiskBackedRowContainer) Reorder(ctx context.Context, ordering colinfo.ColumnOrdering) error
- func (f *DiskBackedRowContainer) Sort(ctx context.Context)
- func (f *DiskBackedRowContainer) SpillToDisk(ctx context.Context) error
- func (f *DiskBackedRowContainer) Spilled() bool
- func (f *DiskBackedRowContainer) UnsafeReset(ctx context.Context) error
- func (f *DiskBackedRowContainer) UsingDisk() bool
type DiskRowContainer
- func MakeDiskRowContainer(diskMonitor *mon.BytesMonitor, types []*types.T, ...) DiskRowContainer
- func (d *DiskRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error
- func (d *DiskRowContainer) AddRowWithDeDup(ctx context.Context, row rowenc.EncDatumRow) (int, error)
- func (d *DiskRowContainer) Close(ctx context.Context)
- func (d *DiskRowContainer) DoDeDuplicate()
- func (d *DiskRowContainer) InitTopK()
- func (d *DiskRowContainer) Len() int
- func (d *DiskRowContainer) MaybeReplaceMax(ctx context.Context, row rowenc.EncDatumRow) error
- func (d *DiskRowContainer) MeanEncodedRowBytes() int
- func (d *DiskRowContainer) NewFinalIterator(ctx context.Context) RowIterator
- func (d *DiskRowContainer) NewIterator(ctx context.Context) RowIterator
- func (d *DiskRowContainer) Reorder(ctx context.Context, ordering colinfo.ColumnOrdering) error
- func (d *DiskRowContainer) Sort(context.Context)
- func (d *DiskRowContainer) UnsafeReset(ctx context.Context) error
type HashDiskBackedRowContainer
- func NewHashDiskBackedRowContainer(evalCtx *tree.EvalContext, memoryMonitor *mon.BytesMonitor, ...) *HashDiskBackedRowContainer
- func (h *HashDiskBackedRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error
- func (h *HashDiskBackedRowContainer) Close(ctx context.Context)
- func (h *HashDiskBackedRowContainer) Init(ctx context.Context, shouldMark bool, types []*types.T, storedEqCols columns, ...) error
- func (h *HashDiskBackedRowContainer) IsEmpty() bool
- func (h *HashDiskBackedRowContainer) NewAllRowsIterator(ctx context.Context) (*AllRowsIterator, error)
- func (h *HashDiskBackedRowContainer) NewBucketIterator(ctx context.Context, row rowenc.EncDatumRow, probeEqCols columns) (RowMarkerIterator, error)
- func (h *HashDiskBackedRowContainer) NewUnmarkedIterator(ctx context.Context) RowIterator
- func (h *HashDiskBackedRowContainer) ReserveMarkMemoryMaybe(ctx context.Context) error
- func (h *HashDiskBackedRowContainer) Sort(ctx context.Context)
- func (h *HashDiskBackedRowContainer) SpillToDisk(ctx context.Context) error
- func (h *HashDiskBackedRowContainer) UsingDisk() bool
type HashDiskRowContainer
- func MakeHashDiskRowContainer(diskMonitor *mon.BytesMonitor, e diskmap.Factory) HashDiskRowContainer
- func (h *HashDiskRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error
- func (h *HashDiskRowContainer) Init(_ context.Context, shouldMark bool, typs []*types.T, storedEqCols columns, ...) error
- func (h *HashDiskRowContainer) IsEmpty() bool
- func (h *HashDiskRowContainer) NewBucketIterator(ctx context.Context, row rowenc.EncDatumRow, probeEqCols columns) (RowMarkerIterator, error)
- func (h *HashDiskRowContainer) NewUnmarkedIterator(ctx context.Context) RowIterator
type HashMemRowContainer
- func MakeHashMemRowContainer(evalCtx *tree.EvalContext, memMonitor *mon.BytesMonitor, typs []*types.T, ...) HashMemRowContainer
- func (h *HashMemRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error
- func (h *HashMemRowContainer) Close(ctx context.Context)
- func (h *HashMemRowContainer) Init(_ context.Context, shouldMark bool, typs []*types.T, storedEqCols columns, ...) error
- func (h *HashMemRowContainer) IsEmpty() bool
- func (h *HashMemRowContainer) NewBucketIterator(ctx context.Context, row rowenc.EncDatumRow, probeEqCols columns) (RowMarkerIterator, error)
- func (h *HashMemRowContainer) NewUnmarkedIterator(ctx context.Context) RowIterator
- func (h *HashMemRowContainer) ReserveMarkMemoryMaybe(ctx context.Context) error
type HashRowContainer
type IndexedRow
- func (ir IndexedRow) GetDatum(colIdx int) (tree.Datum, error)
- func (ir IndexedRow) GetDatums(startColIdx, endColIdx int) (tree.Datums, error)
- func (ir IndexedRow) GetIdx() int
type IndexedRowContainer
type MemRowContainer
- func (mc *MemRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error
- func (mc *MemRowContainer) EncRow(idx int) rowenc.EncDatumRow
- func (mc *MemRowContainer) GetRow(ctx context.Context, pos int) (tree.IndexedRow, error)
- func (mc *MemRowContainer) Init(ordering colinfo.ColumnOrdering, types []*types.T, evalCtx *tree.EvalContext)
- func (mc *MemRowContainer) InitTopK()
- func (mc *MemRowContainer) InitWithMon(ordering colinfo.ColumnOrdering, types []*types.T, evalCtx *tree.EvalContext, ...)
- func (mc *MemRowContainer) Less(i, j int) bool
- func (mc *MemRowContainer) MaybeReplaceMax(ctx context.Context, row rowenc.EncDatumRow) error
- func (mc *MemRowContainer) NewFinalIterator(ctx context.Context) RowIterator
- func (mc *MemRowContainer) NewIterator(_ context.Context) RowIterator
- func (mc *MemRowContainer) Pop() interface{}
- func (mc *MemRowContainer) Push(_ interface{})
- func (mc *MemRowContainer) Reorder(_ context.Context, ordering colinfo.ColumnOrdering) error
- func (mc *MemRowContainer) Sort(ctx context.Context)
type ReorderableRowContainer
type RowContainer
- func NewRowContainer(acc mon.BoundAccount, ti colinfo.ColTypeInfo) *RowContainer
- func NewRowContainerWithCapacity(acc mon.BoundAccount, ti colinfo.ColTypeInfo, rowCapacity int) *RowContainer
- func (c *RowContainer) AddRow(ctx context.Context, row tree.Datums) (tree.Datums, error)
- func (c *RowContainer) At(i int) tree.Datums
- func (c *RowContainer) Clear(ctx context.Context)
- func (c *RowContainer) Close(ctx context.Context)
- func (c *RowContainer) Init(acc mon.BoundAccount, ti colinfo.ColTypeInfo, rowCapacity int)
- func (c *RowContainer) Len() int
- func (c *RowContainer) NumCols() int
- func (c *RowContainer) PopFirst(ctx context.Context)
- func (c *RowContainer) Replace(ctx context.Context, i int, newRow tree.Datums) error
- func (c *RowContainer) Swap(i, j int)
- func (c *RowContainer) UnsafeReset(ctx context.Context) error
type RowIterator
type RowMarkerIterator
type SortableRowContainer

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type AllRowsIterator ¶

type AllRowsIterator struct {
	RowIterator
	// contains filtered or unexported fields
}

AllRowsIterator iterates over all rows in HashDiskBackedRowContainer which should be initialized to not do marking. This iterator will be recreated in-place if the container spills to disk.

func (*AllRowsIterator) Close ¶

func (i *AllRowsIterator) Close()

Close implements RowIterator interface.

type DeDupingRowContainer ¶

type DeDupingRowContainer interface {
	// AddRowWithDeDup adds the given row if not already present in the
	// container. It returns the dense number of when the row is first
	// added.
	AddRowWithDeDup(context.Context, rowenc.EncDatumRow) (int, error)
	// UnsafeReset resets the container, allowing for reuse. It renders all
	// previously allocated rows unsafe.
	UnsafeReset(context.Context) error
	// Close frees up resources held by the container.
	Close(context.Context)
}

DeDupingRowContainer is a container that de-duplicates rows added to the container, and assigns them a dense index starting from 0, representing when that row was first added. It only supports a configuration where all the columns are encoded into the key -- relaxing this is not hard, but is not worth adding the code without a use for it.

type DiskBackedIndexedRowContainer ¶

type DiskBackedIndexedRowContainer struct {
	*DiskBackedRowContainer

	// DisableCache is intended for testing only. It can be set to true to
	// disable reading and writing from the row cache.
	DisableCache bool
	// contains filtered or unexported fields
}

DiskBackedIndexedRowContainer is a wrapper around DiskBackedRowContainer that adds an index to each row added in the order of addition of those rows by storing an extra int column at the end of each row. These indices can be thought of as ordinals of the rows.

Note: although DiskRowContainer appends unique rowIDs to the keys that the rows are put at, MemRowContainer doesn't do something like that, so the code that utilizes internal rowIDs of DiskRowContainer ends up being worse than having this specialized container.

func NewDiskBackedIndexedRowContainer ¶

func NewDiskBackedIndexedRowContainer(
	ordering colinfo.ColumnOrdering,
	typs []*types.T,
	evalCtx *tree.EvalContext,
	engine diskmap.Factory,
	memoryMonitor *mon.BytesMonitor,
	diskMonitor *mon.BytesMonitor,
) *DiskBackedIndexedRowContainer

NewDiskBackedIndexedRowContainer creates a DiskBackedIndexedRowContainer with the given engine as the underlying store that rows are stored on when it spills to disk. Arguments:

ordering is the output ordering; the order in which rows should be sorted.
types is the schema of rows that will be added to this container.
evalCtx defines the context in which to evaluate comparisons, only used when storing rows in memory.
engine is the underlying store that rows are stored on when the container spills to disk.
memoryMonitor is used to monitor this container's memory usage.
diskMonitor is used to monitor this container's disk usage.

func (*DiskBackedIndexedRowContainer) AddRow ¶

func (f *DiskBackedIndexedRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error

AddRow implements SortableRowContainer.

func (*DiskBackedIndexedRowContainer) Close ¶

func (f *DiskBackedIndexedRowContainer) Close(ctx context.Context)

Close implements SortableRowContainer.

func (*DiskBackedIndexedRowContainer) GetRow ¶

func (f *DiskBackedIndexedRowContainer) GetRow(
	ctx context.Context, pos int,
) (tree.IndexedRow, error)

GetRow implements tree.IndexedRows.

Getting a row by index is fast from an in-memory row container but is a lot slower from a disk-backed one. In order to mitigate the impact we add optimizations of maintaining a cache of tree.IndexedRow's and storing a disk iterator along with the index of the row it currently points at.

func (*DiskBackedIndexedRowContainer) Reorder ¶

func (f *DiskBackedIndexedRowContainer) Reorder(
	ctx context.Context, ordering colinfo.ColumnOrdering,
) error

Reorder implements ReorderableRowContainer.

func (*DiskBackedIndexedRowContainer) UnsafeReset ¶

func (f *DiskBackedIndexedRowContainer) UnsafeReset(ctx context.Context) error

UnsafeReset resets the underlying container (if it is using disk, it will be reset to using memory).

type DiskBackedNumberedRowContainer ¶

type DiskBackedNumberedRowContainer struct {
	DisableCache bool
	// contains filtered or unexported fields
}

DiskBackedNumberedRowContainer that stores a map from idx => row, where idx is a 0-based dense numbering. Optionally, if deDup is true, it can de-duplicate the rows before assigning a number. It spills to disk if needed.

func NewDiskBackedNumberedRowContainer ¶

func NewDiskBackedNumberedRowContainer(
	deDup bool,
	types []*types.T,
	evalCtx *tree.EvalContext,
	engine diskmap.Factory,
	memoryMonitor *mon.BytesMonitor,
	diskMonitor *mon.BytesMonitor,
) *DiskBackedNumberedRowContainer

NewDiskBackedNumberedRowContainer creates a DiskBackedNumberedRowContainer.

Arguments:

deDup is true if it should de-duplicate.
types is the schema of rows that will be added to this container.
evalCtx defines the context.
engine is the underlying store that rows are stored on when the container spills to disk.
memoryMonitor is used to monitor this container's memory usage.
diskMonitor is used to monitor this container's disk usage.

func (*DiskBackedNumberedRowContainer) AddRow ¶

func (d *DiskBackedNumberedRowContainer) AddRow(
	ctx context.Context, row rowenc.EncDatumRow,
) (int, error)

AddRow tries to add a row. It returns the position of the row in the container.

func (*DiskBackedNumberedRowContainer) Close ¶

func (d *DiskBackedNumberedRowContainer) Close(ctx context.Context)

Close closes the container.

func (*DiskBackedNumberedRowContainer) GetRow ¶

func (d *DiskBackedNumberedRowContainer) GetRow(
	ctx context.Context, idx int, skip bool,
) (rowenc.EncDatumRow, error)

GetRow returns a row with the given index. If skip is true the row is not actually read and just indicates a read that is being skipped. It is used to maintain synchronization with the future, since the caller can skip accesses for semi-joins and anti-joins.

func (*DiskBackedNumberedRowContainer) SetupForRead ¶

func (d *DiskBackedNumberedRowContainer) SetupForRead(ctx context.Context, accesses [][]int)

SetupForRead must be called before calling GetRow(). No more AddRow() calls are permitted (before UnsafeReset()). See the comment for NumberedDiskRowIterator for how we use the future accesses.

func (*DiskBackedNumberedRowContainer) Spilled ¶

func (d *DiskBackedNumberedRowContainer) Spilled() bool

Spilled returns whether or not the primary container spilled to disk in its lifetime.

func (*DiskBackedNumberedRowContainer) UnsafeReset ¶

func (d *DiskBackedNumberedRowContainer) UnsafeReset(ctx context.Context) error

UnsafeReset resets this container to be reused.

func (*DiskBackedNumberedRowContainer) UsingDisk ¶

func (d *DiskBackedNumberedRowContainer) UsingDisk() bool

UsingDisk returns whether the primary container is using disk.

type DiskBackedRowContainer ¶

type DiskBackedRowContainer struct {
	// contains filtered or unexported fields
}

DiskBackedRowContainer is a ReorderableRowContainer that uses a MemRowContainer to store rows and spills back to disk automatically if memory usage exceeds a given budget.

func (*DiskBackedRowContainer) AddRow ¶

func (f *DiskBackedRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error

AddRow is part of the SortableRowContainer interface.

func (*DiskBackedRowContainer) AddRowWithDeDup ¶

func (f *DiskBackedRowContainer) AddRowWithDeDup(
	ctx context.Context, row rowenc.EncDatumRow,
) (int, error)

AddRowWithDeDup is part of the DeDupingRowContainer interface.

func (*DiskBackedRowContainer) Close ¶

func (f *DiskBackedRowContainer) Close(ctx context.Context)

Close is part of the SortableRowContainer interface.

func (*DiskBackedRowContainer) DoDeDuplicate ¶

func (f *DiskBackedRowContainer) DoDeDuplicate()

DoDeDuplicate causes DiskBackedRowContainer to behave as an implementation of DeDupingRowContainer. It should not be mixed with calls to AddRow().

The rows are deduplicated along the columns in the ordering (the values on those columns are the key). Only the first row with a given key will be stored. The index returned in AddRowWithDedup() is a dense index starting from 0, representing when that key was first added. This feature does not combine with Sort(), Reorder() etc., and only to be used for assignment of these dense indexes.

The main reason to add this to DiskBackedRowContainer is to avoid significant code duplication in constructing another row container.

func (*DiskBackedRowContainer) Init ¶

func (f *DiskBackedRowContainer) Init(
	ordering colinfo.ColumnOrdering,
	types []*types.T,
	evalCtx *tree.EvalContext,
	engine diskmap.Factory,
	memoryMonitor *mon.BytesMonitor,
	diskMonitor *mon.BytesMonitor,
)

Init initializes a DiskBackedRowContainer. Arguments:

ordering is the output ordering; the order in which rows should be sorted.
types is the schema of rows that will be added to this container.
evalCtx defines the context in which to evaluate comparisons, only used when storing rows in memory.
engine is the store used for rows when spilling to disk.
memoryMonitor is used to monitor the DiskBackedRowContainer's memory usage. If this monitor denies an allocation, the DiskBackedRowContainer will spill to disk.
diskMonitor is used to monitor the DiskBackedRowContainer's disk usage if and when it spills to disk.

func (*DiskBackedRowContainer) InitTopK ¶

func (f *DiskBackedRowContainer) InitTopK()

InitTopK is part of the SortableRowContainer interface.

func (*DiskBackedRowContainer) Len ¶

func (f *DiskBackedRowContainer) Len() int

Len is part of the SortableRowContainer interface.

func (*DiskBackedRowContainer) MaybeReplaceMax ¶

func (f *DiskBackedRowContainer) MaybeReplaceMax(
	ctx context.Context, row rowenc.EncDatumRow,
) error

MaybeReplaceMax is part of the SortableRowContainer interface.

func (*DiskBackedRowContainer) NewFinalIterator ¶

func (f *DiskBackedRowContainer) NewFinalIterator(ctx context.Context) RowIterator

NewFinalIterator is part of the SortableRowContainer interface.

func (*DiskBackedRowContainer) NewIterator ¶

func (f *DiskBackedRowContainer) NewIterator(ctx context.Context) RowIterator

NewIterator is part of the SortableRowContainer interface.

func (*DiskBackedRowContainer) Reorder ¶

func (f *DiskBackedRowContainer) Reorder(
	ctx context.Context, ordering colinfo.ColumnOrdering,
) error

Reorder implements ReorderableRowContainer.

func (*DiskBackedRowContainer) Sort ¶

func (f *DiskBackedRowContainer) Sort(ctx context.Context)

Sort is part of the SortableRowContainer interface.

func (*DiskBackedRowContainer) SpillToDisk ¶

func (f *DiskBackedRowContainer) SpillToDisk(ctx context.Context) error

SpillToDisk creates a disk row container, injects all the data from the in-memory container into it, and clears the in-memory one afterwards.

func (*DiskBackedRowContainer) Spilled ¶

func (f *DiskBackedRowContainer) Spilled() bool

Spilled returns whether or not the DiskBackedRowContainer spilled to disk in its lifetime.

func (*DiskBackedRowContainer) UnsafeReset ¶

func (f *DiskBackedRowContainer) UnsafeReset(ctx context.Context) error

UnsafeReset resets the container for reuse. The DiskBackedRowContainer will reset to use memory if it is using disk.

func (*DiskBackedRowContainer) UsingDisk ¶

func (f *DiskBackedRowContainer) UsingDisk() bool

UsingDisk returns whether or not the DiskBackedRowContainer is currently using disk.

type DiskRowContainer ¶

type DiskRowContainer struct {
	// contains filtered or unexported fields
}

DiskRowContainer is a SortableRowContainer that stores rows on disk according to the ordering specified in DiskRowContainer.ordering. The underlying store is a SortedDiskMap so the sorting itself is delegated. Use an iterator created through NewIterator() to read the rows in sorted order.

func MakeDiskRowContainer ¶

func MakeDiskRowContainer(
	diskMonitor *mon.BytesMonitor,
	types []*types.T,
	ordering colinfo.ColumnOrdering,
	e diskmap.Factory,
) DiskRowContainer

MakeDiskRowContainer creates a DiskRowContainer with the given engine as the underlying store that rows are stored on. Arguments:

diskMonitor is used to monitor this DiskRowContainer's disk usage.
types is the schema of rows that will be added to this container.
ordering is the output ordering; the order in which rows should be sorted.
e is the underlying store that rows are stored on.

func (*DiskRowContainer) AddRow ¶

func (d *DiskRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error

AddRow is part of the SortableRowContainer interface.

It is additionally used in de-duping mode by DiskBackedRowContainer when switching from a memory container to this disk container, since it is adding rows that are already de-duped. Once it has added all the already de-duped rows, it should switch to using AddRowWithDeDup() and never call AddRow() again.

Note: if key calculation changes, computeKey() of hashMemRowIterator should be changed accordingly.

func (*DiskRowContainer) AddRowWithDeDup ¶

func (d *DiskRowContainer) AddRowWithDeDup(
	ctx context.Context, row rowenc.EncDatumRow,
) (int, error)

AddRowWithDeDup is part of the DeDupingRowContainer interface.

func (*DiskRowContainer) Close ¶

func (d *DiskRowContainer) Close(ctx context.Context)

Close is part of the SortableRowContainer interface.

func (*DiskRowContainer) DoDeDuplicate ¶

func (d *DiskRowContainer) DoDeDuplicate()

DoDeDuplicate causes DiskRowContainer to behave as an implementation of DeDupingRowContainer. It should not be mixed with calls to AddRow() (except when the AddRow() already represent deduplicated rows). It de-duplicates the keys such that only the first row with the given key will be stored. The index returned in AddRowWithDedup() is a dense index starting from 0, representing when that key was first added. This feature does not combine with Sort(), Reorder() etc., and only to be used for assignment of these dense indexes. The main reason to add this to DiskBackedRowContainer is to avoid significant code duplication in constructing another row container.

func (*DiskRowContainer) InitTopK ¶

func (d *DiskRowContainer) InitTopK()

InitTopK limits iterators to read the first k rows.

func (*DiskRowContainer) Len ¶

func (d *DiskRowContainer) Len() int

Len is part of the SortableRowContainer interface.

func (*DiskRowContainer) MaybeReplaceMax ¶

func (d *DiskRowContainer) MaybeReplaceMax(ctx context.Context, row rowenc.EncDatumRow) error

MaybeReplaceMax adds row to the DiskRowContainer. The SortedDiskMap will sort this row into the top k if applicable.

func (*DiskRowContainer) MeanEncodedRowBytes ¶

func (d *DiskRowContainer) MeanEncodedRowBytes() int

MeanEncodedRowBytes returns the mean bytes consumed by an encoded row stored in this container.

func (*DiskRowContainer) NewFinalIterator ¶

func (d *DiskRowContainer) NewFinalIterator(ctx context.Context) RowIterator

NewFinalIterator returns an iterator that reads rows exactly once throughout the lifetime of a DiskRowContainer. Rows are not actually discarded from the DiskRowContainer, but the lastReadKey is kept track of in order to serve as the start key for future diskRowFinalIterators. NOTE: Don't use NewFinalIterator if you passed in an ordering for the rows and will be adding rows between iterations. New rows could sort before the current row.

func (*DiskRowContainer) NewIterator ¶

func (d *DiskRowContainer) NewIterator(ctx context.Context) RowIterator

NewIterator is part of the SortableRowContainer interface.

func (*DiskRowContainer) Reorder ¶

func (d *DiskRowContainer) Reorder(ctx context.Context, ordering colinfo.ColumnOrdering) error

Reorder implements ReorderableRowContainer. It creates a new DiskRowContainer with the requested ordering and adds a row one by one from the current DiskRowContainer, the latter is closed at the end.

func (*DiskRowContainer) Sort ¶

func (d *DiskRowContainer) Sort(context.Context)

Sort is a noop because the use of a SortedDiskMap as the underlying store keeps the rows in sorted order.

func (*DiskRowContainer) UnsafeReset ¶

func (d *DiskRowContainer) UnsafeReset(ctx context.Context) error

UnsafeReset is part of the SortableRowContainer interface.

type HashDiskBackedRowContainer ¶

type HashDiskBackedRowContainer struct {
	// contains filtered or unexported fields
}

HashDiskBackedRowContainer is a hashRowContainer that uses a HashMemRowContainer to store rows and spills to disk automatically if memory usage exceeds a given budget. When spilled to disk, the rows are stored with an extra boolean column to keep track of that row's mark.

func NewHashDiskBackedRowContainer ¶

func NewHashDiskBackedRowContainer(
	evalCtx *tree.EvalContext,
	memoryMonitor *mon.BytesMonitor,
	diskMonitor *mon.BytesMonitor,
	engine diskmap.Factory,
) *HashDiskBackedRowContainer

NewHashDiskBackedRowContainer makes a HashDiskBackedRowContainer.

func (*HashDiskBackedRowContainer) AddRow ¶

func (h *HashDiskBackedRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error

AddRow adds a row to the HashDiskBackedRowContainer. This row is unmarked by default.

func (*HashDiskBackedRowContainer) Close ¶

func (h *HashDiskBackedRowContainer) Close(ctx context.Context)

Close implements the HashRowContainer interface.

func (*HashDiskBackedRowContainer) Init ¶

func (h *HashDiskBackedRowContainer) Init(
	ctx context.Context, shouldMark bool, types []*types.T, storedEqCols columns, encodeNull bool,
) error

Init implements the hashRowContainer interface.

func (*HashDiskBackedRowContainer) IsEmpty ¶

func (h *HashDiskBackedRowContainer) IsEmpty() bool

IsEmpty implements the HashRowContainer interface.

func (*HashDiskBackedRowContainer) NewAllRowsIterator ¶

func (h *HashDiskBackedRowContainer) NewAllRowsIterator(
	ctx context.Context,
) (*AllRowsIterator, error)

NewAllRowsIterator creates AllRowsIterator that can iterate over all rows (equivalent to an unmarked iterator when the container doesn't do marking) and will be recreated if the spilling to disk occurs.

func (*HashDiskBackedRowContainer) NewBucketIterator ¶

func (h *HashDiskBackedRowContainer) NewBucketIterator(
	ctx context.Context, row rowenc.EncDatumRow, probeEqCols columns,
) (RowMarkerIterator, error)

NewBucketIterator implements the hashRowContainer interface.

func (*HashDiskBackedRowContainer) NewUnmarkedIterator ¶

func (h *HashDiskBackedRowContainer) NewUnmarkedIterator(ctx context.Context) RowIterator

NewUnmarkedIterator implements the hashRowContainer interface.

func (*HashDiskBackedRowContainer) ReserveMarkMemoryMaybe ¶

func (h *HashDiskBackedRowContainer) ReserveMarkMemoryMaybe(ctx context.Context) error

ReserveMarkMemoryMaybe attempts to reserve memory for marks if we're using an in-memory container at the moment. If there is not enough memory left, it spills to disk.

func (*HashDiskBackedRowContainer) Sort ¶

func (h *HashDiskBackedRowContainer) Sort(ctx context.Context)

Sort sorts the underlying row container based on stored equality columns which forces all rows from the same hash bucket to be contiguous.

func (*HashDiskBackedRowContainer) SpillToDisk ¶

func (h *HashDiskBackedRowContainer) SpillToDisk(ctx context.Context) error

SpillToDisk creates a disk row container, injects all the data from the in-memory container into it, and clears the in-memory one afterwards.

func (*HashDiskBackedRowContainer) UsingDisk ¶

func (h *HashDiskBackedRowContainer) UsingDisk() bool

UsingDisk returns whether or not the HashDiskBackedRowContainer is currently using disk.

type HashDiskRowContainer ¶

type HashDiskRowContainer struct {
	DiskRowContainer
	// contains filtered or unexported fields
}

HashDiskRowContainer is an on-disk implementation of a HashRowContainer. The rows are stored in an underlying DiskRowContainer with an extra boolean column to keep track of that row's mark.

func MakeHashDiskRowContainer ¶

func MakeHashDiskRowContainer(
	diskMonitor *mon.BytesMonitor, e diskmap.Factory,
) HashDiskRowContainer

MakeHashDiskRowContainer creates a HashDiskRowContainer with the given engine as the underlying store that rows are stored on. shouldMark specifies whether the HashDiskRowContainer should set itself up to mark rows.

func (*HashDiskRowContainer) AddRow ¶

func (h *HashDiskRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error

AddRow adds a row to the HashDiskRowContainer. This row is unmarked by default.

func (*HashDiskRowContainer) Init ¶

func (h *HashDiskRowContainer) Init(
	_ context.Context, shouldMark bool, typs []*types.T, storedEqCols columns, encodeNull bool,
) error

Init implements the HashRowContainer interface.

func (*HashDiskRowContainer) IsEmpty ¶

func (h *HashDiskRowContainer) IsEmpty() bool

IsEmpty implements the HashRowContainer interface.

func (*HashDiskRowContainer) NewBucketIterator ¶

func (h *HashDiskRowContainer) NewBucketIterator(
	ctx context.Context, row rowenc.EncDatumRow, probeEqCols columns,
) (RowMarkerIterator, error)

NewBucketIterator implements the HashRowContainer interface.

func (*HashDiskRowContainer) NewUnmarkedIterator ¶

func (h *HashDiskRowContainer) NewUnmarkedIterator(ctx context.Context) RowIterator

NewUnmarkedIterator implements the HashRowContainer interface.

type HashMemRowContainer ¶

type HashMemRowContainer struct {
	*MemRowContainer
	// contains filtered or unexported fields
}

HashMemRowContainer is an in-memory implementation of a HashRowContainer. The rows are stored in an underlying MemRowContainer and an accompanying map stores the mapping from equality column encodings to indices in the MemRowContainer corresponding to matching rows. NOTE: Once a row is marked, adding more rows to the HashMemRowContainer results in undefined behavior. It is not necessary to do otherwise for the current usage of HashMemRowContainer.

func MakeHashMemRowContainer ¶

func MakeHashMemRowContainer(
	evalCtx *tree.EvalContext, memMonitor *mon.BytesMonitor, typs []*types.T, storedEqCols columns,
) HashMemRowContainer

MakeHashMemRowContainer creates a HashMemRowContainer. This rowContainer must still be Close()d by the caller.

func (*HashMemRowContainer) AddRow ¶

func (h *HashMemRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error

AddRow adds a row to the HashMemRowContainer. This row is unmarked by default.

func (*HashMemRowContainer) Close ¶

func (h *HashMemRowContainer) Close(ctx context.Context)

Close implements the HashRowContainer interface.

func (*HashMemRowContainer) Init ¶

func (h *HashMemRowContainer) Init(
	_ context.Context, shouldMark bool, typs []*types.T, storedEqCols columns, encodeNull bool,
) error

Init implements the HashRowContainer interface. types is ignored because the schema is inferred from the MemRowContainer.

func (*HashMemRowContainer) IsEmpty ¶

func (h *HashMemRowContainer) IsEmpty() bool

IsEmpty implements the HashRowContainer interface.

func (*HashMemRowContainer) NewBucketIterator ¶

func (h *HashMemRowContainer) NewBucketIterator(
	ctx context.Context, row rowenc.EncDatumRow, probeEqCols columns,
) (RowMarkerIterator, error)

NewBucketIterator implements the HashRowContainer interface.

func (*HashMemRowContainer) NewUnmarkedIterator ¶

func (h *HashMemRowContainer) NewUnmarkedIterator(ctx context.Context) RowIterator

NewUnmarkedIterator implements the HashRowContainer interface.

func (*HashMemRowContainer) ReserveMarkMemoryMaybe ¶

func (h *HashMemRowContainer) ReserveMarkMemoryMaybe(ctx context.Context) error

ReserveMarkMemoryMaybe is a utility function to grow the HashMemRowContainer's memory account by the memory needed to mark all rows. It is a noop if h.markMemoryReserved is true.

type HashRowContainer ¶

type HashRowContainer interface {
	// Init initializes the HashRowContainer with the given equality columns.
	//	- shouldMark specifies whether the caller cares about marking rows. If
	//	  not, the HashRowContainer will not perform any row marking logic. This
	//	  is meant to optimize space usage and runtime.
	//	- types is the schema of rows that will be added to this container.
	//	- storedEqCols are the equality columns of rows stored in this
	// 	  container.
	// 	  i.e. when adding a row, the columns specified by storedEqCols are used
	// 	  to get the bucket that the row should be added to.
	//	- encodeNull indicates whether rows with NULL equality columns should be
	//	  stored or skipped.
	Init(
		ctx context.Context, shouldMark bool, types []*types.T, storedEqCols columns, encodeNull bool,
	) error
	AddRow(context.Context, rowenc.EncDatumRow) error
	// IsEmpty returns true if no rows have been added to the container so far.
	IsEmpty() bool

	// NewBucketIterator returns a RowMarkerIterator that iterates over a bucket
	// of rows that match the given row on equality columns. This iterator can
	// also be used to mark rows.
	// Rows are marked because of the use of this interface by the hashJoiner.
	// Given a row, the hashJoiner does not necessarily want to emit all rows
	// that match on equality columns. There is an additional `ON` clause that
	// specifies an arbitrary expression that matching rows must pass to be
	// emitted. For full/outer joins, this is tracked through marking rows if
	// they match and then iterating over all unmarked rows to emit those that
	// did not match.
	// 	- probeEqCols are the equality columns of the given row that are used to
	// 	  get the bucket of matching rows.
	NewBucketIterator(
		ctx context.Context, row rowenc.EncDatumRow, probeEqCols columns,
	) (RowMarkerIterator, error)

	// NewUnmarkedIterator returns a RowIterator that iterates over unmarked
	// rows. If shouldMark was false in Init(), this iterator iterates over all
	// rows.
	NewUnmarkedIterator(context.Context) RowIterator

	// Close frees up resources held by the HashRowContainer.
	Close(context.Context)
}

HashRowContainer is a container used to store rows according to an encoding of given equality columns. The stored rows can then be probed to return a bucket of matching rows. Additionally, each stored row can be marked and all rows that are unmarked can be iterated over. An example of where this is useful is in full/outer joins. The caller can mark all matched rows and iterate over the unmarked rows to produce a result.

type IndexedRow ¶

type IndexedRow struct {
	Idx int
	Row rowenc.EncDatumRow
}

IndexedRow is a row with a corresponding index.

func (IndexedRow) GetDatum ¶

func (ir IndexedRow) GetDatum(colIdx int) (tree.Datum, error)

GetDatum implements tree.IndexedRow interface.

func (IndexedRow) GetDatums ¶

func (ir IndexedRow) GetDatums(startColIdx, endColIdx int) (tree.Datums, error)

GetDatums implements tree.IndexedRow interface.

func (IndexedRow) GetIdx ¶

func (ir IndexedRow) GetIdx() int

GetIdx implements tree.IndexedRow interface.

type IndexedRowContainer ¶

type IndexedRowContainer interface {
	ReorderableRowContainer

	// GetRow returns a row at the given index or an error.
	GetRow(ctx context.Context, idx int) (tree.IndexedRow, error)
}

IndexedRowContainer is a ReorderableRowContainer which also implements tree.IndexedRows. It allows retrieving a row at a particular index.

type MemRowContainer ¶

type MemRowContainer struct {
	RowContainer
	// contains filtered or unexported fields
}

MemRowContainer is the wrapper around rowcontainer.RowContainer that provides more functionality, especially around converting to/from EncDatumRows and facilitating sorting.

func (*MemRowContainer) AddRow ¶

func (mc *MemRowContainer) AddRow(ctx context.Context, row rowenc.EncDatumRow) error

AddRow adds a row to the container.

func (*MemRowContainer) EncRow ¶

func (mc *MemRowContainer) EncRow(idx int) rowenc.EncDatumRow

EncRow returns the idx-th row as an EncDatumRow. The slice itself is reused so it is only valid until the next call to EncRow.

func (*MemRowContainer) GetRow ¶

func (mc *MemRowContainer) GetRow(ctx context.Context, pos int) (tree.IndexedRow, error)

GetRow implements IndexedRowContainer.

func (*MemRowContainer) Init ¶

func (mc *MemRowContainer) Init(
	ordering colinfo.ColumnOrdering, types []*types.T, evalCtx *tree.EvalContext,
)

Init initializes the MemRowContainer. The MemRowContainer uses evalCtx.Mon to track memory usage.

func (*MemRowContainer) InitTopK ¶

func (mc *MemRowContainer) InitTopK()

InitTopK rearranges the rows in the MemRowContainer into a Max-Heap.

func (*MemRowContainer) InitWithMon ¶

func (mc *MemRowContainer) InitWithMon(
	ordering colinfo.ColumnOrdering,
	types []*types.T,
	evalCtx *tree.EvalContext,
	mon *mon.BytesMonitor,
)

InitWithMon initializes the MemRowContainer with an explicit monitor. Only use this if the default MemRowContainer.Init() function is insufficient.

func (*MemRowContainer) Less ¶

func (mc *MemRowContainer) Less(i, j int) bool

Less is part of heap.Interface and is only meant to be used internally.

func (*MemRowContainer) MaybeReplaceMax ¶

func (mc *MemRowContainer) MaybeReplaceMax(ctx context.Context, row rowenc.EncDatumRow) error

MaybeReplaceMax replaces the maximum element with the given row, if it is smaller. Assumes InitTopK was called.

func (*MemRowContainer) NewFinalIterator ¶

func (mc *MemRowContainer) NewFinalIterator(ctx context.Context) RowIterator

NewFinalIterator returns an iterator that can be used to iterate over a MemRowContainer. Note that this iterator doesn't iterate over a snapshot of MemRowContainer and that it deletes rows as soon as they are iterated over.

func (*MemRowContainer) NewIterator ¶

func (mc *MemRowContainer) NewIterator(_ context.Context) RowIterator

NewIterator returns an iterator that can be used to iterate over a MemRowContainer. Note that this iterator doesn't iterate over a snapshot of MemRowContainer.

func (*MemRowContainer) Pop ¶

func (mc *MemRowContainer) Pop() interface{}

Pop is part of heap.Interface.

func (*MemRowContainer) Push ¶

func (mc *MemRowContainer) Push(_ interface{})

Push is part of heap.Interface.

func (*MemRowContainer) Reorder ¶

func (mc *MemRowContainer) Reorder(_ context.Context, ordering colinfo.ColumnOrdering) error

Reorder implements ReorderableRowContainer. We don't need to create a new MemRowContainer and can just change the ordering on-the-fly.

func (*MemRowContainer) Sort ¶

func (mc *MemRowContainer) Sort(ctx context.Context)

Sort is part of the SortableRowContainer interface.

type ReorderableRowContainer ¶

type ReorderableRowContainer interface {
	SortableRowContainer

	// Reorder changes the ordering on which the rows are sorted. In order for
	// new ordering to take effect, Sort() must be called. It returns an error if
	// it occurs.
	Reorder(context.Context, colinfo.ColumnOrdering) error
}

ReorderableRowContainer is a SortableRowContainer that can change the ordering on which the rows are sorted.

type RowContainer ¶

type RowContainer struct {
	// contains filtered or unexported fields
}

RowContainer is a container for rows of Datums which tracks the approximate amount of memory allocated for row data. Rows must be added using AddRow(); once the work is done the Close() method must be called to release the allocated memory.

TODO(knz): this does not currently track the amount of memory used for the outer array of Datums references.

func NewRowContainer ¶

func NewRowContainer(acc mon.BoundAccount, ti colinfo.ColTypeInfo) *RowContainer

NewRowContainer allocates a new row container.

The acc argument indicates where to register memory allocations by this row container. Should probably be created by Session.makeBoundAccount() or Session.TxnState.makeBoundAccount().

Note that we could, but do not (yet), report the size of the row container itself to the monitor in this constructor. This is because the various planNodes are not (yet) equipped to call Close() upon encountering errors in their constructor (all nodes initializing a RowContainer there) and SetLimitHint() (for sortNode which initializes a RowContainer there). This would be rather error-prone to implement consistently and hellishly difficult to test properly. The trade-off is that very large table schemas or column selections could cause unchecked and potentially dangerous memory growth.

func NewRowContainerWithCapacity ¶

func NewRowContainerWithCapacity(
	acc mon.BoundAccount, ti colinfo.ColTypeInfo, rowCapacity int,
) *RowContainer

NewRowContainerWithCapacity is like NewRowContainer, but it accepts a rowCapacity argument.

If provided, rowCapacity indicates how many rows are to be expected. The value is used to configure the size of chunks that are allocated within the container such that if no more than the specific number of rows is added to the container, only a single chunk will be allocated and wasted space will be kept to a minimum.

func (*RowContainer) AddRow ¶

func (c *RowContainer) AddRow(ctx context.Context, row tree.Datums) (tree.Datums, error)

AddRow attempts to insert a new row in the RowContainer. The row slice is not used directly: the Datum values inside the Datums are copied to internal storage. Returns an error if the allocation was denied by the MemoryMonitor.

func (*RowContainer) At ¶

func (c *RowContainer) At(i int) tree.Datums

At accesses a row at a specific index. Note that it does *not* copy the row: callers must copy the row if they wish to mutate it.

func (*RowContainer) Clear ¶

func (c *RowContainer) Clear(ctx context.Context)

Clear resets the container and releases the associated memory. This allows the RowContainer to be reused.

func (*RowContainer) Close ¶

func (c *RowContainer) Close(ctx context.Context)

Close releases the memory associated with the RowContainer.

func (*RowContainer) Init ¶

func (c *RowContainer) Init(acc mon.BoundAccount, ti colinfo.ColTypeInfo, rowCapacity int)

Init can be used instead of NewRowContainer if we have a RowContainer that is already part of an on-heap structure.

func (*RowContainer) Len ¶

func (c *RowContainer) Len() int

Len reports the number of rows currently held in this RowContainer.

func (*RowContainer) NumCols ¶

func (c *RowContainer) NumCols() int

NumCols reports the number of columns for each row in the container.

func (*RowContainer) PopFirst ¶

func (c *RowContainer) PopFirst(ctx context.Context)

PopFirst discards the first row in the RowContainer.

func (*RowContainer) Replace ¶

func (c *RowContainer) Replace(ctx context.Context, i int, newRow tree.Datums) error

Replace substitutes one row for another. This does query the MemoryMonitor to determine whether the new row fits the allowance.

func (*RowContainer) Swap ¶

func (c *RowContainer) Swap(i, j int)

Swap exchanges two rows. Used for sorting.

func (*RowContainer) UnsafeReset ¶

func (c *RowContainer) UnsafeReset(ctx context.Context) error

UnsafeReset resets the container without releasing the associated memory. This allows the RowContainer to be reused, but keeps the previously-allocated buffers around for reuse. This is desirable if this RowContainer will be used and reset many times in the course of a computation before eventually being discarded. It's unsafe because it immediately renders all previously allocated rows unsafe - they might be overwritten without notice. This is only safe to use if it's guaranteed that all previous rows retrieved by At have been copied or otherwise not retained.

type RowIterator ¶

type RowIterator interface {
	// Rewind seeks to the first row.
	Rewind()
	// Valid must be called after any call to Rewind() or Next(). It returns
	// (true, nil) if the iterator points to a valid row and (false, nil) if the
	// iterator has moved past the last row.
	// If an error has occurred, the returned bool is invalid.
	Valid() (bool, error)
	// Next advances the iterator to the next row in the iteration.
	Next()
	// Row returns the current row. The returned row is only valid until the
	// next call to Rewind() or Next().
	Row() (rowenc.EncDatumRow, error)

	// Close frees up resources held by the iterator.
	Close()
}

RowIterator is a simple iterator used to iterate over sqlbase.EncDatumRows. Example use:

var i RowIterator
for i.Rewind(); ; i.Next() {
	if ok, err := i.Valid(); err != nil {
		// Handle error.
	} else if !ok {
		break
	}
	row, err := i.Row()
	if err != nil {
		// Handle error.
	}
	// Do something.
}

type RowMarkerIterator ¶

type RowMarkerIterator interface {
	RowIterator
	// Reset resets this iterator to point at a bucket that matches the given
	// row. This will cause RowIterator.Rewind to rewind to the front of the
	// input row's bucket.
	Reset(ctx context.Context, row rowenc.EncDatumRow) error
	Mark(ctx context.Context) error
	IsMarked(ctx context.Context) bool
}

RowMarkerIterator is a RowIterator that can be used to mark rows.

type SortableRowContainer ¶

type SortableRowContainer interface {
	Len() int
	// AddRow adds a row to the container. If an error is returned, then the
	// row wasn't actually added.
	AddRow(context.Context, rowenc.EncDatumRow) error
	// Sort sorts the rows according to the current ordering (the one set either
	// at initialization or by the last call of Reorder() - if the container is
	// ReorderableRowContainer).
	Sort(context.Context)
	// NewIterator returns a RowIterator that can be used to iterate over
	// the rows.
	NewIterator(context.Context) RowIterator
	// NewFinalIterator returns a RowIterator that can be used to iterate over the
	// rows, possibly freeing resources along the way. Subsequent calls to
	// NewIterator or NewFinalIterator are not guaranteed to return any rows.
	NewFinalIterator(context.Context) RowIterator

	// UnsafeReset resets the container, allowing for reuse. It renders all
	// previously allocated rows unsafe.
	UnsafeReset(context.Context) error

	// InitTopK enables optimizations in cases where the caller cares only about
	// the top k rows where k is the size of the SortableRowContainer when
	// InitTopK is called. Once InitTopK is called, callers should not call
	// AddRow. Iterators created after calling InitTopK are guaranteed to read the
	// top k rows only.
	InitTopK()
	// MaybeReplaceMax checks whether the given row belongs in the top k rows,
	// potentially evicting a row in favor of the given row.
	MaybeReplaceMax(context.Context, rowenc.EncDatumRow) error

	// Close frees up resources held by the SortableRowContainer.
	Close(context.Context)
}

SortableRowContainer is a container used to store rows and optionally sort these.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL